annotate lisp/gnus/utf7.el @ 98182:19ec1646fe6c

The Rmail/mbox merge has been abandoned in favor of a restart using the current rmail.el file. A comprehensive list of changes will be supplied when pmail.el is morphed back into rmail.el The current status is that pmail.el supports basic Rmail navigation (no summary support) and shows the current message in a special buffer using buffer-swap-text. No decoding is done yet. That is the next step.
author Paul Reilly <pmr@pajato.com>
date Mon, 15 Sep 2008 20:56:53 +0000
parents f42ef85caf91
children a9dc0e7c3f2b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
1 ;;; utf7.el --- UTF-7 encoding/decoding for Emacs -*-coding: iso-8859-1;-*-
64754
fafd692d1e40 Update years in copyright notice; nfc.
Thien-Thi Nguyen <ttn@gnuvola.org>
parents: 64085
diff changeset
2
74548
ce127a46b1ca Update copyright years.
Glenn Morris <rgm@gnu.org>
parents: 69135
diff changeset
3 ;; Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004,
79708
1cb31606209f Add 2008 to copyright years.
Glenn Morris <rgm@gnu.org>
parents: 78224
diff changeset
4 ;; 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
5
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
6 ;; Author: Jon K Hellan <hellan@acm.org>
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
7 ;; Maintainer: bugs@gnus.org
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
8 ;; Keywords: mail
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
9
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
10 ;; This file is part of GNU Emacs.
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
11
94662
f42ef85caf91 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 93975
diff changeset
12 ;; GNU Emacs is free software: you can redistribute it and/or modify
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
13 ;; it under the terms of the GNU General Public License as published by
94662
f42ef85caf91 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 93975
diff changeset
14 ;; the Free Software Foundation, either version 3 of the License, or
f42ef85caf91 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 93975
diff changeset
15 ;; (at your option) any later version.
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
16
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
17 ;; GNU Emacs is distributed in the hope that it will be useful,
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
20 ;; GNU General Public License for more details.
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
21
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
22 ;; You should have received a copy of the GNU General Public License
94662
f42ef85caf91 Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents: 93975
diff changeset
23 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
24
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
25 ;;; Commentary:
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
26
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
27 ;; UTF-7 - A Mail-Safe Transformation Format of Unicode - RFC 2152
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
28 ;; This is a transformation format of Unicode that contains only 7-bit
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
29 ;; ASCII octets and is intended to be readable by humans in the limiting
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
30 ;; case that the document consists of characters from the US-ASCII
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
31 ;; repertoire.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
32 ;; In short, runs of characters outside US-ASCII are encoded as base64
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
33 ;; inside delimiters.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
34 ;; A variation of UTF-7 is specified in IMAP 4rev1 (RFC 2060) as the way
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
35 ;; to represent characters outside US-ASCII in mailbox names in IMAP.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
36 ;; This library supports both variants, but the IMAP variation was the
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
37 ;; reason I wrote it.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
38 ;; The routines convert UTF-7 -> UTF-16 (16 bit encoding of Unicode)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
39 ;; -> current character set, and vice versa.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
40 ;; However, until Emacs supports Unicode, the only Emacs character set
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
41 ;; supported here is ISO-8859.1, which can trivially be converted to/from
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
42 ;; Unicode.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
43 ;; When decoding results in a character outside the Emacs character set,
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
44 ;; an error is thrown. It is up to the application to recover.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
45
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
46 ;; UTF-7 should be done by providing a coding system. Mule-UCS does
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
47 ;; already, but I don't know if it does the IMAP version and it's not
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
48 ;; clear whether that should really be a coding system. The UTF-16
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
49 ;; part of the conversion can be done with coding systems available
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
50 ;; with Mule-UCS or some versions of Emacs. Unfortunately these were
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
51 ;; done wrongly (regarding handling of byte-order marks and how the
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
52 ;; variants were named), so we don't have a consistent name for the
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
53 ;; necessary coding system. The code below doesn't seem to DTRT
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
54 ;; generally. E.g.:
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
55 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
56 ;; (utf7-encode "a+£")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
57 ;; => "a+ACsAow-"
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
58 ;;
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
59 ;; $ echo "a+£"|iconv -f iso-8859-1 -t utf-7
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
60 ;; a+-+AKM
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
61 ;;
59996
aac0a33f5772 Change release version from 21.4 to 22.1 throughout.
Kim F. Storm <storm@cua.dk>
parents: 56927
diff changeset
62 ;; -- fx
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
63
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
64
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
65 ;;; Code:
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
66
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
67 (require 'base64)
33119
e08be2ed6301 Require cl when compiling.
Dave Love <fx@gnu.org>
parents: 31717
diff changeset
68 (eval-when-compile (require 'cl))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
69 (require 'mm-util)
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
70
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
71 (defconst utf7-direct-encoding-chars " -%'-*,-[]-}"
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
72 "Character ranges which do not need escaping in UTF-7.")
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
73
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
74 (defconst utf7-imap-direct-encoding-chars
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
75 (concat utf7-direct-encoding-chars "+\\~")
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
76 "Character ranges which do not need escaping in the IMAP variant of UTF-7.")
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
77
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
78 (defconst utf7-utf-16-coding-system
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
79 (cond ((mm-coding-system-p 'utf-16-be-no-signature) ; Mule-UCS
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
80 'utf-16-be-no-signature)
69135
12d289b6e5de Revision: emacs@sv.gnu.org/emacs--devo--0--patch-118
Miles Bader <miles@gnu.org>
parents: 68633
diff changeset
81 ((and (mm-coding-system-p 'utf-16-be) ; Emacs 21.3, Emacs 22
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
82 ;; Avoid versions with BOM.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
83 (= 2 (length (encode-coding-string "a" 'utf-16-be))))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
84 'utf-16-be)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
85 ((mm-coding-system-p 'utf-16-be-nosig) ; ?
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
86 'utf-16-be-nosig))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
87 "Coding system which encodes big endian UTF-16 without a BOM signature.")
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
88
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
89 (defsubst utf7-imap-get-pad-length (len modulus)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
90 "Return required length of padding for IMAP modified base64 fragment."
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
91 (mod (- len) modulus))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
92
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
93 (defun utf7-encode-internal (&optional for-imap)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
94 "Encode text in (temporary) buffer as UTF-7.
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
95 Use IMAP modification if FOR-IMAP is non-nil."
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
96 (let ((start (point-min))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
97 (end (point-max)))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
98 (narrow-to-region start end)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
99 (goto-char start)
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
100 (let* ((esc-char (if for-imap ?& ?+))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
101 (direct-encoding-chars
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
102 (if for-imap utf7-imap-direct-encoding-chars
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
103 utf7-direct-encoding-chars))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
104 (not-direct-encoding-chars (concat "^" direct-encoding-chars)))
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
105 (while (not (eobp))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
106 (skip-chars-forward direct-encoding-chars)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
107 (unless (eobp)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
108 (insert esc-char)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
109 (let ((p (point))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
110 (fc (following-char))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
111 (run-length
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
112 (skip-chars-forward not-direct-encoding-chars)))
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
113 (if (and (= fc esc-char)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
114 (= run-length 1)) ; Lone esc-char?
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
115 (delete-backward-char 1) ; Now there's one too many
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
116 (utf7-fragment-encode p (point) for-imap))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
117 (insert "-")))))))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
118
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
119 (defun utf7-fragment-encode (start end &optional for-imap)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
120 "Encode text from START to END in buffer as UTF-7 escape fragment.
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
121 Use IMAP modification if FOR-IMAP is non-nil."
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
122 (save-restriction
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
123 (narrow-to-region start end)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
124 (funcall (utf7-get-u16char-converter 'to-utf-16))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
125 (mm-with-unibyte-current-buffer
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
126 (base64-encode-region start (point-max)))
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
127 (goto-char start)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
128 (let ((pm (point-max)))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
129 (when for-imap
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
130 (while (search-forward "/" nil t)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
131 (replace-match ",")))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
132 (skip-chars-forward "^= \t\n" pm)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
133 (delete-region (point) pm))))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
134
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
135 (defun utf7-decode-internal (&optional for-imap)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
136 "Decode UTF-7 text in (temporary) buffer.
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
137 Use IMAP modification if FOR-IMAP is non-nil."
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
138 (let ((start (point-min))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
139 (end (point-max)))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
140 (goto-char start)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
141 (let* ((esc-pattern (concat "^" (char-to-string (if for-imap ?& ?+))))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
142 (base64-chars (concat "A-Za-z0-9+"
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
143 (char-to-string (if for-imap ?, ?/)))))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
144 (while (not (eobp))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
145 (skip-chars-forward esc-pattern)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
146 (unless (eobp)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
147 (forward-char)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
148 (let ((p (point))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
149 (run-length (skip-chars-forward base64-chars)))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
150 (when (and (not (eobp)) (= (following-char) ?-))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
151 (delete-char 1))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
152 (unless (= run-length 0) ; Encoded lone esc-char?
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
153 (save-excursion
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
154 (utf7-fragment-decode p (point) for-imap)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
155 (goto-char p)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
156 (delete-backward-char 1)))))))))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
157
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
158 (defun utf7-fragment-decode (start end &optional for-imap)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
159 "Decode base64 encoded fragment from START to END of UTF-7 text in buffer.
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
160 Use IMAP modification if FOR-IMAP is non-nil."
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
161 (save-restriction
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
162 (narrow-to-region start end)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
163 (when for-imap
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
164 (goto-char start)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
165 (while (search-forward "," nil 'move-to-end) (replace-match "/")))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
166 (let ((pl (utf7-imap-get-pad-length (- end start) 4)))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
167 (insert (make-string pl ?=))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
168 (base64-decode-region start (+ end pl)))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
169 (funcall (utf7-get-u16char-converter 'from-utf-16))))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
170
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
171 (defun utf7-get-u16char-converter (which-way)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
172 "Return a function to convert between UTF-16 and current character set."
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
173 (if utf7-utf-16-coding-system
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
174 (if (eq which-way 'to-utf-16)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
175 (lambda ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
176 (encode-coding-region (point-min) (point-max)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
177 utf7-utf-16-coding-system))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
178 (lambda ()
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
179 (decode-coding-region (point-min) (point-max)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
180 utf7-utf-16-coding-system)))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
181 ;; Add test to check if we are really Latin-1.
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
182 (if (eq which-way 'to-utf-16)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
183 'utf7-latin1-u16-char-converter
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
184 'utf7-u16-latin1-char-converter)))
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
185
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
186 (defun utf7-latin1-u16-char-converter ()
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
187 "Convert latin 1 (ISO-8859.1) characters to 16 bit Unicode.
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
188 Characters are converted to raw byte pairs in narrowed buffer."
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
189 (mm-encode-coding-region (point-min) (point-max) 'iso-8859-1)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
190 (mm-disable-multibyte)
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
191 (goto-char (point-min))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
192 (while (not (eobp))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
193 (insert 0)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
194 (forward-char)))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
195
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
196 (defun utf7-u16-latin1-char-converter ()
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
197 "Convert 16 bit Unicode characters to latin 1 (ISO-8859.1).
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
198 Characters are in raw byte pairs in narrowed buffer."
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
199 (goto-char (point-min))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
200 (while (not (eobp))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
201 (if (= 0 (following-char))
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
202 (delete-char 1)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
203 (error "Unable to convert from Unicode"))
56927
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
204 (forward-char))
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
205 (mm-decode-coding-region (point-min) (point-max) 'iso-8859-1)
55fd4f77387a Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Miles Bader <miles@gnu.org>
parents: 52401
diff changeset
206 (mm-enable-multibyte))
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
207
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
208 (defun utf7-encode (string &optional for-imap)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
209 "Encode UTF-7 STRING. Use IMAP modification if FOR-IMAP is non-nil."
86255
50442c77e242 (utf7-encode, utf7-decode): Use coding system
Reiner Steib <Reiner.Steib@gmx.de>
parents: 78224
diff changeset
210 (if (and (coding-system-p 'utf-7) (coding-system-p 'utf-7-imap))
50442c77e242 (utf7-encode, utf7-decode): Use coding system
Reiner Steib <Reiner.Steib@gmx.de>
parents: 78224
diff changeset
211 ;; Emacs 23 with proper support for IMAP
50442c77e242 (utf7-encode, utf7-decode): Use coding system
Reiner Steib <Reiner.Steib@gmx.de>
parents: 78224
diff changeset
212 (encode-coding-string string (if for-imap 'utf-7-imap 'utf-7))
50442c77e242 (utf7-encode, utf7-decode): Use coding system
Reiner Steib <Reiner.Steib@gmx.de>
parents: 78224
diff changeset
213 (let ((default-enable-multibyte-characters t))
50442c77e242 (utf7-encode, utf7-decode): Use coding system
Reiner Steib <Reiner.Steib@gmx.de>
parents: 78224
diff changeset
214 (with-temp-buffer
50442c77e242 (utf7-encode, utf7-decode): Use coding system
Reiner Steib <Reiner.Steib@gmx.de>
parents: 78224
diff changeset
215 (insert string)
50442c77e242 (utf7-encode, utf7-decode): Use coding system
Reiner Steib <Reiner.Steib@gmx.de>
parents: 78224
diff changeset
216 (utf7-encode-internal for-imap)
50442c77e242 (utf7-encode, utf7-decode): Use coding system
Reiner Steib <Reiner.Steib@gmx.de>
parents: 78224
diff changeset
217 (buffer-string)))))
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
218
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
219 (defun utf7-decode (string &optional for-imap)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
220 "Decode UTF-7 STRING. Use IMAP modification if FOR-IMAP is non-nil."
86255
50442c77e242 (utf7-encode, utf7-decode): Use coding system
Reiner Steib <Reiner.Steib@gmx.de>
parents: 78224
diff changeset
221 (if (and (coding-system-p 'utf-7) (coding-system-p 'utf-7-imap))
50442c77e242 (utf7-encode, utf7-decode): Use coding system
Reiner Steib <Reiner.Steib@gmx.de>
parents: 78224
diff changeset
222 ;; Emacs 23 with proper support for IMAP
50442c77e242 (utf7-encode, utf7-decode): Use coding system
Reiner Steib <Reiner.Steib@gmx.de>
parents: 78224
diff changeset
223 (decode-coding-string string (if for-imap 'utf-7-imap 'utf-7))
50442c77e242 (utf7-encode, utf7-decode): Use coding system
Reiner Steib <Reiner.Steib@gmx.de>
parents: 78224
diff changeset
224 (let ((default-enable-multibyte-characters nil))
50442c77e242 (utf7-encode, utf7-decode): Use coding system
Reiner Steib <Reiner.Steib@gmx.de>
parents: 78224
diff changeset
225 (with-temp-buffer
50442c77e242 (utf7-encode, utf7-decode): Use coding system
Reiner Steib <Reiner.Steib@gmx.de>
parents: 78224
diff changeset
226 (insert string)
50442c77e242 (utf7-encode, utf7-decode): Use coding system
Reiner Steib <Reiner.Steib@gmx.de>
parents: 78224
diff changeset
227 (utf7-decode-internal for-imap)
50442c77e242 (utf7-encode, utf7-decode): Use coding system
Reiner Steib <Reiner.Steib@gmx.de>
parents: 78224
diff changeset
228 (mm-enable-multibyte)
50442c77e242 (utf7-encode, utf7-decode): Use coding system
Reiner Steib <Reiner.Steib@gmx.de>
parents: 78224
diff changeset
229 (buffer-string)))))
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
230
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
231 (provide 'utf7)
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
232
93975
1e3a407766b9 Fix up comment convention on the arch-tag lines.
Stefan Monnier <monnier@iro.umontreal.ca>
parents: 87649
diff changeset
233 ;; arch-tag: 96078b55-85c7-4161-aed2-932c24b282c7
31717
6b20b7e85e3c *** empty log message ***
Gerd Moellmann <gerd@gnu.org>
parents:
diff changeset
234 ;;; utf7.el ends here