Mercurial > emacs
changeset 89427:e25f0fe176b0
*** empty log message ***
author | Dave Love <fx@gnu.org> |
---|---|
date | Thu, 08 May 2003 17:58:17 +0000 |
parents | 2013b9934ed0 |
children | eed327e0bad3 |
files | lisp/ChangeLog lisp/international/utf-7.el |
diffstat | 2 files changed, 149 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/lisp/ChangeLog Thu May 08 17:57:14 2003 +0000 +++ b/lisp/ChangeLog Thu May 08 17:58:17 2003 +0000 @@ -1,3 +1,9 @@ +2003-05-08 Dave Love <fx@gnu.org> + + * international/utf-7.el: New file. + + * international/mule-conf.el (utf-7): New. + 2003-05-06 Kenichi Handa <handa@m17n.org> * international/mule-conf.el (utf-16-be)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lisp/international/utf-7.el Thu May 08 17:58:17 2003 +0000 @@ -0,0 +1,143 @@ +;;; utf-7.el --- utf-7 coding system + +;; Copyright (C) 2003 Free Software Foundation, Inc. + +;; Author: Dave Love <fx@gnu.org> +;; Keywords: i18n, mail + +;; This file is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; This file is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to +;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; Defines a coding system for UTF-7, defined in RFC 2152. Non-ASCII +;; segments are encoded as base64-encoded big endian UTF-16. Also +;; defines a variation required for IMAP (RFC 2060). + +;; The encoding and decoding was originally taken from Jon K Hellan's +;; implementation in Gnus, but has been substantially re-done. + +;; This probably needs more attention. In particular, it's not +;; completely consistent with iconv's behaviour. It's arguable +;; whether the IMAP version should be a coding system since it's +;; apparently only used for IMAP mailbox names. + +;;; Code: + +;; See mule-conf. +;; (define-coding-system 'utf-7 +;; "UTF-7 encoding of Unicode (RFC 2152)" +;; :coding-type 'utf-8 +;; :mnemonic ?U +;; :mime-charset 'utf-7 +;; :charset-list '(unicode) +;; :pre-write-conversion 'utf-7-pre-write-conversion +;; :post-read-conversion 'utf-7-post-read-conversion) + +;; (define-coding-system 'utf-7-imap +;; "UTF-7 encoding of Unicode, IMAP version (RFC 2060)" +;; :coding-type 'utf-8 +;; :mnemonic ?U +;; :mime-charset 'utf-7 +;; :charset-list '(unicode) +;; :pre-write-conversion 'utf-7-imap-pre-write-conversion +;; :post-read-conversion 'utf-7-imap-post-read-conversion)) + +;;;###autoload +(defun utf-7-decode (len imap) + "Decode LEN bytes of UTF-7 at point. +IMAP non-nil means use the IMAP version." + (save-excursion + (save-restriction + (narrow-to-region (point) (+ (point) len)) + (let ((not-esc (if imap "^&" "^+")) + (skip-chars (if imap "A-Za-z0-9+," "A-Za-z0-9+/"))) + (while (not (eobp)) + (skip-chars-forward not-esc) + (unless (eobp) + (forward-char) + (let ((p (point)) + (run-length (skip-chars-forward skip-chars))) + (if (eq ?- (char-after)) + (delete-char 1)) + (unless (= run-length 0) ; encoded lone esc-char + (let ((pl (mod (- run-length) 4))) + (insert-char ?= pl) + (if imap + (subst-char-in-region p (point) ?, ?/)) + (base64-decode-region p (point))) + (decode-coding-region p (point) 'utf-16-be) + (save-excursion + (goto-char p) + (delete-backward-char 1))))))) + (- (point-max) (point-min))))) + +(defun utf-7-post-read-conversion (len) + (utf-7-decode len nil)) + +(defun utf-7-imap-post-read-conversion (len) + (utf-7-decode len t)) + +;;;###autoload +(defun utf-7-encode (from to imap) + "Encode bytes between FROM and TO to UTF-7. +ESC and SKIP-CHARS are adjusted for the normal and IMAP versions." + (let* ((old-buf (current-buffer)) + (esc (if imap ?& ?+)) + ;; These are characters which can be encoded asis. + (skip-chars (if imap + "\t\n\r\x20-\x25\x27-\x7e" ; rfc2060 + ;; This includes the rfc2152 optional set. + ;; Perhaps it shouldn't (like iconv). + "\t\n\r -*,-[]-}")) + (not-skip-chars (format "^%s%c" skip-chars esc))) + (set-buffer (generate-new-buffer " *temp*")) + (if (stringp from) + (insert from) + (insert-buffer-substring old-buf from to)) + (goto-char (point-min)) + (while (not (eobp)) + (skip-chars-forward skip-chars) + (if (eq ?+ (char-after)) + (progn (forward-char) + (insert ?-)) + (unless (eobp) + (insert esc) + (let ((p (point))) + (skip-chars-forward not-skip-chars) + (save-restriction + ;; encode-coding-region doesn't preserve point + (narrow-to-region p (point)) + (encode-coding-region p (point-max) 'utf-16-be) + (base64-encode-region p (point-max)) + (if imap + (subst-char-in-region p (point-max) ?/ ?,)) + (goto-char p) + ;; As I read the RFC, this isn't correct, but it's + ;; consistent with iconv, at least regarding `='. + (skip-chars-forward "^= \t\n") + (delete-region (point) (point-max)))) + (unless (eobp) + (insert ?-))))) + nil)) + +(defun utf-7-pre-write-conversion (from to) + (utf-7-encode from to nil)) + +(defun utf-7-imap-pre-write-conversion (from to) + (utf-7-encode from to t)) + +(provide 'utf-7) +;;; utf-7.el ends here