Mercurial > emacs
annotate lisp/gnus/rfc1843.el @ 106746:e09f67191b6c
Merge from mainline.
author | Katsumi Yamaoka <yamaoka@jpl.org> |
---|---|
date | Wed, 06 Jan 2010 09:16:36 +0000 |
parents | a9dc0e7c3f2b |
children | 1d1d5d9bd884 |
rev | line source |
---|---|
31717 | 1 ;;; rfc1843.el --- HZ (rfc1843) decoding |
64754
fafd692d1e40
Update years in copyright notice; nfc.
Thien-Thi Nguyen <ttn@gnuvola.org>
parents:
64085
diff
changeset
|
2 |
74547 | 3 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, |
100908 | 4 ;; 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. |
31717 | 5 |
6 ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu> | |
7 ;; Keywords: news HZ HZ+ mail i18n | |
8 | |
38413
a26d9b55abb6
Some fixes to follow coding conventions in files from Gnus.
Pavel Janík <Pavel@Janik.cz>
parents:
32178
diff
changeset
|
9 ;; This file is part of GNU Emacs. |
31717 | 10 |
94662
f42ef85caf91
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
11 ;; GNU Emacs is free software: you can redistribute it and/or modify |
f42ef85caf91
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
12 ;; it under the terms of the GNU General Public License as published by |
f42ef85caf91
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
13 ;; the Free Software Foundation, either version 3 of the License, or |
f42ef85caf91
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
14 ;; (at your option) any later version. |
31717 | 15 |
94662
f42ef85caf91
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
16 ;; GNU Emacs is distributed in the hope that it will be useful, |
f42ef85caf91
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
f42ef85caf91
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
f42ef85caf91
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
19 ;; GNU General Public License for more details. |
31717 | 20 |
21 ;; You should have received a copy of the GNU General Public License | |
94662
f42ef85caf91
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
22 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
31717 | 23 |
24 ;;; Commentary: | |
25 | |
26 ;; Usage: | |
27 ;; (require 'rfc1843) | |
28 ;; (rfc1843-gnus-setup) | |
29 ;; | |
30 ;; Test: | |
31 ;; (rfc1843-decode-string "~{<:Ky2;S{#,NpJ)l6HK!#~}") | |
32 | |
33 ;;; Code: | |
34 | |
87229
e4e336c0677e
Add declare-function compatibility definition.
Glenn Morris <rgm@gnu.org>
parents:
78224
diff
changeset
|
35 ;; For Emacs < 22.2. |
e4e336c0677e
Add declare-function compatibility definition.
Glenn Morris <rgm@gnu.org>
parents:
78224
diff
changeset
|
36 (eval-and-compile |
e4e336c0677e
Add declare-function compatibility definition.
Glenn Morris <rgm@gnu.org>
parents:
78224
diff
changeset
|
37 (unless (fboundp 'declare-function) (defmacro declare-function (&rest r)))) |
e4e336c0677e
Add declare-function compatibility definition.
Glenn Morris <rgm@gnu.org>
parents:
78224
diff
changeset
|
38 |
32178 | 39 (eval-when-compile (require 'cl)) |
31717 | 40 (require 'mm-util) |
41 | |
65283
48de3c91b9bc
(gnus-decode-encoded-word-function, gnus-decode-header-function,
Juanma Barranquero <lekktu@gmail.com>
parents:
64754
diff
changeset
|
42 (defvar gnus-decode-encoded-word-function) |
48de3c91b9bc
(gnus-decode-encoded-word-function, gnus-decode-header-function,
Juanma Barranquero <lekktu@gmail.com>
parents:
64754
diff
changeset
|
43 (defvar gnus-decode-header-function) |
48de3c91b9bc
(gnus-decode-encoded-word-function, gnus-decode-header-function,
Juanma Barranquero <lekktu@gmail.com>
parents:
64754
diff
changeset
|
44 (defvar gnus-newsgroup-name) |
48de3c91b9bc
(gnus-decode-encoded-word-function, gnus-decode-header-function,
Juanma Barranquero <lekktu@gmail.com>
parents:
64754
diff
changeset
|
45 |
31717 | 46 (defvar rfc1843-word-regexp |
47 "~\\({\\([\041-\167][\041-\176]\\| \\)+\\)\\(~}\\|$\\)") | |
48 | |
49 (defvar rfc1843-word-regexp-strictly | |
50 "~\\({\\([\041-\167][\041-\176]\\)+\\)\\(~}\\|$\\)") | |
51 | |
52 (defvar rfc1843-hzp-word-regexp | |
53 "~\\({\\([\041-\167][\041-\176]\\| \\)+\\|\ | |
56927
55fd4f77387a
Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
52401
diff
changeset
|
54 \[<>]\\([\041-\175][\041-\176]\\| \\)+\\)\\(~}\\|$\\)") |
31717 | 55 |
56 (defvar rfc1843-hzp-word-regexp-strictly | |
57 "~\\({\\([\041-\167][\041-\176]\\)+\\|\ | |
56927
55fd4f77387a
Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
52401
diff
changeset
|
58 \[<>]\\([\041-\175][\041-\176]\\)+\\)\\(~}\\|$\\)") |
31717 | 59 |
60 (defcustom rfc1843-decode-loosely nil | |
61 "Loosely check HZ encoding if non-nil. | |
62 When it is set non-nil, only buffers or strings with strictly | |
63 HZ-encoded are decoded." | |
64 :type 'boolean | |
56927
55fd4f77387a
Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
52401
diff
changeset
|
65 :group 'mime) |
31717 | 66 |
67 (defcustom rfc1843-decode-hzp t | |
68 "HZ+ decoding support if non-nil. | |
69 HZ+ specification (also known as HZP) is to provide a standardized | |
70 7-bit representation of mixed Big5, GB, and ASCII text for convenient | |
71 e-mail transmission, news posting, etc. | |
72 The document of HZ+ 0.78 specification can be found at | |
73 ftp://ftp.math.psu.edu/pub/simpson/chinese/hzp/hzp.doc" | |
74 :type 'boolean | |
56927
55fd4f77387a
Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
52401
diff
changeset
|
75 :group 'mime) |
31717 | 76 |
77 (defcustom rfc1843-newsgroups-regexp "chinese\\|hz" | |
78 "Regexp of newsgroups in which might be HZ encoded." | |
79 :type 'string | |
56927
55fd4f77387a
Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents:
52401
diff
changeset
|
80 :group 'mime) |
31717 | 81 |
82 (defun rfc1843-decode-region (from to) | |
83 "Decode HZ in the region between FROM and TO." | |
84 (interactive "r") | |
85 (let (str firstc) | |
86 (save-excursion | |
87 (goto-char from) | |
88 (if (or rfc1843-decode-loosely | |
89 (re-search-forward (if rfc1843-decode-hzp | |
90 rfc1843-hzp-word-regexp-strictly | |
91 rfc1843-word-regexp-strictly) to t)) | |
92 (save-restriction | |
93 (narrow-to-region from to) | |
94 (goto-char (point-min)) | |
95 (while (re-search-forward (if rfc1843-decode-hzp | |
96 rfc1843-hzp-word-regexp | |
97 rfc1843-word-regexp) (point-max) t) | |
98 ;;; Text with extents may cause XEmacs crash | |
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
38413
diff
changeset
|
99 (setq str (buffer-substring-no-properties |
31717 | 100 (match-beginning 1) |
101 (match-end 1))) | |
102 (setq firstc (aref str 0)) | |
103 (insert (mm-decode-coding-string | |
104 (rfc1843-decode | |
105 (prog1 | |
106 (substring str 1) | |
107 (delete-region (match-beginning 0) (match-end 0))) | |
108 firstc) | |
109 (if (eq firstc ?{) 'cn-gb-2312 'cn-big5)))) | |
110 (goto-char (point-min)) | |
111 (while (search-forward "~" (point-max) t) | |
112 (cond ((eq (char-after) ?\n) | |
113 (delete-char -1) | |
114 (delete-char 1)) | |
115 ((eq (char-after) ?~) | |
116 (delete-char 1))))))))) | |
117 | |
118 (defun rfc1843-decode-string (string) | |
119 "Decode HZ STRING and return the results." | |
120 (let ((m (mm-multibyte-p))) | |
121 (with-temp-buffer | |
122 (when m | |
123 (mm-enable-multibyte)) | |
124 (insert string) | |
125 (inline | |
126 (rfc1843-decode-region (point-min) (point-max))) | |
127 (buffer-string)))) | |
128 | |
129 (defun rfc1843-decode (word &optional firstc) | |
130 "Decode HZ WORD and return it." | |
131 (let ((i -1) (s (substring word 0)) v) | |
132 (if (or (not firstc) (eq firstc ?{)) | |
133 (while (< (incf i) (length s)) | |
134 (if (eq (setq v (aref s i)) ? ) nil | |
135 (aset s i (+ 128 v)))) | |
136 (while (< (incf i) (length s)) | |
137 (if (eq (setq v (aref s i)) ? ) nil | |
138 (setq v (+ (* 94 v) (aref s (1+ i)) -3135)) | |
139 (aset s i (+ (/ v 157) (if (eq firstc ?<) 201 161))) | |
140 (setq v (% v 157)) | |
141 (aset s (incf i) (+ v (if (< v 63) 64 98)))))) | |
142 s)) | |
143 | |
87229
e4e336c0677e
Add declare-function compatibility definition.
Glenn Morris <rgm@gnu.org>
parents:
78224
diff
changeset
|
144 (autoload 'mail-header-parse-content-type "mail-parse") |
e4e336c0677e
Add declare-function compatibility definition.
Glenn Morris <rgm@gnu.org>
parents:
78224
diff
changeset
|
145 (autoload 'message-narrow-to-head "message") |
e4e336c0677e
Add declare-function compatibility definition.
Glenn Morris <rgm@gnu.org>
parents:
78224
diff
changeset
|
146 (declare-function message-fetch-field "message" (header &optional not-all)) |
e4e336c0677e
Add declare-function compatibility definition.
Glenn Morris <rgm@gnu.org>
parents:
78224
diff
changeset
|
147 |
31717 | 148 (defun rfc1843-decode-article-body () |
149 "Decode HZ encoded text in the article body." | |
150 (if (string-match (concat "\\<\\(" rfc1843-newsgroups-regexp "\\)\\>") | |
151 (or gnus-newsgroup-name "")) | |
152 (save-excursion | |
153 (save-restriction | |
154 (message-narrow-to-head) | |
155 (let* ((inhibit-point-motion-hooks t) | |
156 (case-fold-search t) | |
157 (ct (message-fetch-field "Content-Type" t)) | |
68720
d9dde5b81e71
Revision: emacs@sv.gnu.org/emacs--devo--0--patch-57
Miles Bader <miles@gnu.org>
parents:
68633
diff
changeset
|
158 (ctl (and ct (mail-header-parse-content-type ct)))) |
49598
0d8b17d428b5
Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents:
38413
diff
changeset
|
159 (if (and ctl (not (string-match "/" (car ctl)))) |
31717 | 160 (setq ctl nil)) |
161 (goto-char (point-max)) | |
162 (widen) | |
163 (forward-line 1) | |
164 (narrow-to-region (point) (point-max)) | |
165 (when (or (not ctl) | |
166 (equal (car ctl) "text/plain")) | |
167 (rfc1843-decode-region (point) (point-max)))))))) | |
168 | |
169 (defvar rfc1843-old-gnus-decode-header-function nil) | |
170 (defvar gnus-decode-header-methods) | |
171 (defvar gnus-decode-encoded-word-methods) | |
172 | |
173 (defun rfc1843-gnus-setup () | |
174 "Setup HZ decoding for Gnus." | |
175 (require 'gnus-art) | |
176 (require 'gnus-sum) | |
177 (add-hook 'gnus-article-decode-hook 'rfc1843-decode-article-body t) | |
178 (setq gnus-decode-encoded-word-function | |
179 'gnus-multi-decode-encoded-word-string | |
180 gnus-decode-header-function | |
181 'gnus-multi-decode-header | |
182 gnus-decode-encoded-word-methods | |
183 (nconc gnus-decode-encoded-word-methods | |
184 (list | |
185 (cons (concat "\\<\\(" rfc1843-newsgroups-regexp "\\)\\>") | |
186 'rfc1843-decode-string))) | |
187 gnus-decode-header-methods | |
188 (nconc gnus-decode-header-methods | |
189 (list | |
190 (cons (concat "\\<\\(" rfc1843-newsgroups-regexp "\\)\\>") | |
191 'rfc1843-decode-region))))) | |
192 | |
193 (provide 'rfc1843) | |
194 | |
93975
1e3a407766b9
Fix up comment convention on the arch-tag lines.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
87649
diff
changeset
|
195 ;; arch-tag: 5149c301-a6ca-4731-9c9d-ba616e2cb687 |
31717 | 196 ;;; rfc1843.el ends here |