Mercurial > emacs
annotate lisp/gnus/rfc1843.el @ 48478:a94c995f94de
*** empty log message ***
| author | Stefan Monnier <monnier@iro.umontreal.ca> |
|---|---|
| date | Wed, 20 Nov 2002 18:54:25 +0000 |
| parents | a26d9b55abb6 |
| children | 0d8b17d428b5 |
| rev | line source |
|---|---|
| 31717 | 1 ;;; rfc1843.el --- HZ (rfc1843) decoding |
| 2 ;; Copyright (c) 1998, 1999, 2000 Free Software Foundation, Inc. | |
| 3 | |
| 4 ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu> | |
| 5 ;; Keywords: news HZ HZ+ mail i18n | |
| 6 | |
|
38413
a26d9b55abb6
Some fixes to follow coding conventions in files from Gnus.
Pavel Jan?k <Pavel@Janik.cz>
parents:
32178
diff
changeset
|
7 ;; This file is part of GNU Emacs. |
| 31717 | 8 |
| 9 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
| 10 ;; it under the terms of the GNU General Public License as published | |
| 11 ;; by the Free Software Foundation; either version 2, or (at your | |
| 12 ;; option) any later version. | |
| 13 | |
| 14 ;; GNU Emacs is distributed in the hope that it will be useful, but | |
| 15 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 17 ;; General Public License for more details. | |
| 18 | |
| 19 ;; You should have received a copy of the GNU General Public License | |
| 20 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
| 21 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
| 22 ;; Boston, MA 02111-1307, USA. | |
| 23 | |
| 24 ;;; Commentary: | |
| 25 | |
| 26 ;; Usage: | |
| 27 ;; (require 'rfc1843) | |
| 28 ;; (rfc1843-gnus-setup) | |
| 29 ;; | |
| 30 ;; Test: | |
| 31 ;; (rfc1843-decode-string "~{<:Ky2;S{#,NpJ)l6HK!#~}") | |
| 32 | |
| 33 ;;; Code: | |
| 34 | |
| 32178 | 35 (eval-when-compile (require 'cl)) |
| 31717 | 36 (require 'mm-util) |
| 37 | |
| 38 (defvar rfc1843-word-regexp | |
| 39 "~\\({\\([\041-\167][\041-\176]\\| \\)+\\)\\(~}\\|$\\)") | |
| 40 | |
| 41 (defvar rfc1843-word-regexp-strictly | |
| 42 "~\\({\\([\041-\167][\041-\176]\\)+\\)\\(~}\\|$\\)") | |
| 43 | |
| 44 (defvar rfc1843-hzp-word-regexp | |
| 45 "~\\({\\([\041-\167][\041-\176]\\| \\)+\\|\ | |
| 46 [<>]\\([\041-\175][\041-\176]\\| \\)+\\)\\(~}\\|$\\)") | |
| 47 | |
| 48 (defvar rfc1843-hzp-word-regexp-strictly | |
| 49 "~\\({\\([\041-\167][\041-\176]\\)+\\|\ | |
| 50 [<>]\\([\041-\175][\041-\176]\\)+\\)\\(~}\\|$\\)") | |
| 51 | |
| 52 (defcustom rfc1843-decode-loosely nil | |
| 53 "Loosely check HZ encoding if non-nil. | |
| 54 When it is set non-nil, only buffers or strings with strictly | |
| 55 HZ-encoded are decoded." | |
| 56 :type 'boolean | |
| 57 :group 'gnus) | |
| 58 | |
| 59 (defcustom rfc1843-decode-hzp t | |
| 60 "HZ+ decoding support if non-nil. | |
| 61 HZ+ specification (also known as HZP) is to provide a standardized | |
| 62 7-bit representation of mixed Big5, GB, and ASCII text for convenient | |
| 63 e-mail transmission, news posting, etc. | |
| 64 The document of HZ+ 0.78 specification can be found at | |
| 65 ftp://ftp.math.psu.edu/pub/simpson/chinese/hzp/hzp.doc" | |
| 66 :type 'boolean | |
| 67 :group 'gnus) | |
| 68 | |
| 69 (defcustom rfc1843-newsgroups-regexp "chinese\\|hz" | |
| 70 "Regexp of newsgroups in which might be HZ encoded." | |
| 71 :type 'string | |
| 72 :group 'gnus) | |
| 73 | |
| 74 (defun rfc1843-decode-region (from to) | |
| 75 "Decode HZ in the region between FROM and TO." | |
| 76 (interactive "r") | |
| 77 (let (str firstc) | |
| 78 (save-excursion | |
| 79 (goto-char from) | |
| 80 (if (or rfc1843-decode-loosely | |
| 81 (re-search-forward (if rfc1843-decode-hzp | |
| 82 rfc1843-hzp-word-regexp-strictly | |
| 83 rfc1843-word-regexp-strictly) to t)) | |
| 84 (save-restriction | |
| 85 (narrow-to-region from to) | |
| 86 (goto-char (point-min)) | |
| 87 (while (re-search-forward (if rfc1843-decode-hzp | |
| 88 rfc1843-hzp-word-regexp | |
| 89 rfc1843-word-regexp) (point-max) t) | |
| 90 ;;; Text with extents may cause XEmacs crash | |
| 91 (setq str (buffer-substring-no-properties | |
| 92 (match-beginning 1) | |
| 93 (match-end 1))) | |
| 94 (setq firstc (aref str 0)) | |
| 95 (insert (mm-decode-coding-string | |
| 96 (rfc1843-decode | |
| 97 (prog1 | |
| 98 (substring str 1) | |
| 99 (delete-region (match-beginning 0) (match-end 0))) | |
| 100 firstc) | |
| 101 (if (eq firstc ?{) 'cn-gb-2312 'cn-big5)))) | |
| 102 (goto-char (point-min)) | |
| 103 (while (search-forward "~" (point-max) t) | |
| 104 (cond ((eq (char-after) ?\n) | |
| 105 (delete-char -1) | |
| 106 (delete-char 1)) | |
| 107 ((eq (char-after) ?~) | |
| 108 (delete-char 1))))))))) | |
| 109 | |
| 110 (defun rfc1843-decode-string (string) | |
| 111 "Decode HZ STRING and return the results." | |
| 112 (let ((m (mm-multibyte-p))) | |
| 113 (with-temp-buffer | |
| 114 (when m | |
| 115 (mm-enable-multibyte)) | |
| 116 (insert string) | |
| 117 (inline | |
| 118 (rfc1843-decode-region (point-min) (point-max))) | |
| 119 (buffer-string)))) | |
| 120 | |
| 121 (defun rfc1843-decode (word &optional firstc) | |
| 122 "Decode HZ WORD and return it." | |
| 123 (let ((i -1) (s (substring word 0)) v) | |
| 124 (if (or (not firstc) (eq firstc ?{)) | |
| 125 (while (< (incf i) (length s)) | |
| 126 (if (eq (setq v (aref s i)) ? ) nil | |
| 127 (aset s i (+ 128 v)))) | |
| 128 (while (< (incf i) (length s)) | |
| 129 (if (eq (setq v (aref s i)) ? ) nil | |
| 130 (setq v (+ (* 94 v) (aref s (1+ i)) -3135)) | |
| 131 (aset s i (+ (/ v 157) (if (eq firstc ?<) 201 161))) | |
| 132 (setq v (% v 157)) | |
| 133 (aset s (incf i) (+ v (if (< v 63) 64 98)))))) | |
| 134 s)) | |
| 135 | |
| 136 (defun rfc1843-decode-article-body () | |
| 137 "Decode HZ encoded text in the article body." | |
| 138 (if (string-match (concat "\\<\\(" rfc1843-newsgroups-regexp "\\)\\>") | |
| 139 (or gnus-newsgroup-name "")) | |
| 140 (save-excursion | |
| 141 (save-restriction | |
| 142 (message-narrow-to-head) | |
| 143 (let* ((inhibit-point-motion-hooks t) | |
| 144 (case-fold-search t) | |
| 145 (ct (message-fetch-field "Content-Type" t)) | |
| 146 (ctl (and ct (ignore-errors | |
| 147 (mail-header-parse-content-type ct))))) | |
| 148 (if (and ctl (not (string-match "/" (car ctl)))) | |
| 149 (setq ctl nil)) | |
| 150 (goto-char (point-max)) | |
| 151 (widen) | |
| 152 (forward-line 1) | |
| 153 (narrow-to-region (point) (point-max)) | |
| 154 (when (or (not ctl) | |
| 155 (equal (car ctl) "text/plain")) | |
| 156 (rfc1843-decode-region (point) (point-max)))))))) | |
| 157 | |
| 158 (defvar rfc1843-old-gnus-decode-header-function nil) | |
| 159 (defvar gnus-decode-header-methods) | |
| 160 (defvar gnus-decode-encoded-word-methods) | |
| 161 | |
| 162 (defun rfc1843-gnus-setup () | |
| 163 "Setup HZ decoding for Gnus." | |
| 164 (require 'gnus-art) | |
| 165 (require 'gnus-sum) | |
| 166 (add-hook 'gnus-article-decode-hook 'rfc1843-decode-article-body t) | |
| 167 (setq gnus-decode-encoded-word-function | |
| 168 'gnus-multi-decode-encoded-word-string | |
| 169 gnus-decode-header-function | |
| 170 'gnus-multi-decode-header | |
| 171 gnus-decode-encoded-word-methods | |
| 172 (nconc gnus-decode-encoded-word-methods | |
| 173 (list | |
| 174 (cons (concat "\\<\\(" rfc1843-newsgroups-regexp "\\)\\>") | |
| 175 'rfc1843-decode-string))) | |
| 176 gnus-decode-header-methods | |
| 177 (nconc gnus-decode-header-methods | |
| 178 (list | |
| 179 (cons (concat "\\<\\(" rfc1843-newsgroups-regexp "\\)\\>") | |
| 180 'rfc1843-decode-region))))) | |
| 181 | |
| 182 (provide 'rfc1843) | |
| 183 | |
| 184 ;;; rfc1843.el ends here |
