Mercurial > emacs
diff lisp/international/utf-7.el @ 50897:289523dd4fa7
*** empty log message ***
author | Dave Love <fx@gnu.org> |
---|---|
date | Thu, 08 May 2003 17:54:14 +0000 |
parents | |
children | 90215b7be64d |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lisp/international/utf-7.el Thu May 08 17:54:14 2003 +0000 @@ -0,0 +1,139 @@ +;;; utf-7.el --- utf-7 coding system + +;; Copyright (C) 2003 Free Software Foundation, Inc. + +;; Author: Dave Love <fx@gnu.org> +;; Keywords: i18n, mail + +;; This file is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; This file is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to +;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; Defines a coding system for UTF-7, defined in RFC 2152. Non-ASCII +;; segments are encoded as base64-encoded big endian UTF-16. Also +;; defines a variation required for IMAP (RFC 2060). + +;; The encoding and decoding was originally taken from Jon K Hellan's +;; implementation in Gnus, but has been substantially re-done. + +;; This probably needs more attention. In particular, it's not +;; completely consistent with iconv's behaviour. It's arguable +;; whether the IMAP version should be a coding system since it's +;; apparently only used for IMAP mailbox names, so it's commented out. + +;;; Code: + +(make-coding-system + 'utf-7 0 ?U + "UTF-7 encoding of Unicode (RFC 2152)" + nil + `((safe-chars . ,(coding-system-get 'utf-16-be 'safe-chars)) + (mime-charset . utf-7) + (pre-write-conversion . utf-7-pre-write-conversion) + (post-read-conversion . utf-7-post-read-conversion))) + +;; (make-coding-system +;; 'utf-7-imap 0 ?u +;; "UTF-7 encoding of Unicode, IMAP version (RFC 2060)" +;; nil +;; `((safe-chars . ,(coding-system-get 'utf-16-be 'safe-chars)) +;; (pre-write-conversion . utf-7-imap-pre-write-conversion) +;; (post-read-conversion . utf-7-imap-post-read-conversion))) + +(defun utf-7-decode (len imap) + "Decode LEN bytes of UTF-7 at point. +IMAP non-nil means use the IMAP version." + (save-excursion + (save-restriction + (narrow-to-region (point) (+ (point) len)) + (let ((not-esc (if imap "^&" "^+")) + (skip-chars (if imap "A-Za-z0-9+," "A-Za-z0-9+/"))) + (while (not (eobp)) + (skip-chars-forward not-esc) + (unless (eobp) + (forward-char) + (let ((p (point)) + (run-length (skip-chars-forward skip-chars))) + (if (eq ?- (char-after)) + (delete-char 1)) + (unless (= run-length 0) ; encoded lone esc-char + (let ((pl (mod (- run-length) 4))) + (insert-char ?= pl) + (if imap + (subst-char-in-region p (point) ?, ?/)) + (base64-decode-region p (point))) + (decode-coding-region p (point) 'utf-16-be) + (save-excursion + (goto-char p) + (delete-backward-char 1))))))) + (- (point-max) (point-min))))) + +(defun utf-7-post-read-conversion (len) + (utf-7-decode len nil)) + +;; (defun utf-7-imap-post-read-conversion (len) +;; (utf-7-decode len t)) + +(defun utf-7-encode (from to imap) + "Encode bytes between FROM and TO to UTF-7. +ESC and SKIP-CHARS are adjusted for the normal and IMAP versions." + (let* ((old-buf (current-buffer)) + (esc (if imap ?& ?+)) + ;; These are characters which can be encoded asis. + (skip-chars (if imap + "\t\n\r\x20-\x25\x27-\x7e" ; rfc2060 + ;; This includes the rfc2152 optional set. + ;; Perhaps it shouldn't (like iconv). + "\t\n\r -*,-[]-}")) + (not-skip-chars (format "^%s%c" skip-chars esc))) + (set-buffer (generate-new-buffer " *temp*")) + (if (stringp from) + (insert from) + (insert-buffer-substring old-buf from to)) + (goto-char (point-min)) + (while (not (eobp)) + (skip-chars-forward skip-chars) + (if (eq ?+ (char-after)) + (progn (forward-char) + (insert ?-)) + (unless (eobp) + (insert esc) + (let ((p (point))) + (skip-chars-forward not-skip-chars) + (save-restriction + ;; encode-coding-region doesn't preserve point + (narrow-to-region p (point)) + (encode-coding-region p (point-max) 'utf-16-be) + (base64-encode-region p (point-max)) + (if imap + (subst-char-in-region p (point-max) ?/ ?,)) + (goto-char p) + ;; As I read the RFC, this isn't correct, but it's + ;; consistent with iconv, at least regarding `='. + (skip-chars-forward "^= \t\n") + (delete-region (point) (point-max)))) + (unless (eobp) + (insert ?-))))) + nil)) + +(defun utf-7-pre-write-conversion (from to) + (utf-7-encode from to nil)) + +;; (defun utf-7-imap-pre-write-conversion (from to) +;; (utf-7-encode from to t)) + +(provide 'utf-7) +;;; utf-7.el ends here