Mercurial > emacs
changeset 50897:289523dd4fa7
*** empty log message ***
author | Dave Love <fx@gnu.org> |
---|---|
date | Thu, 08 May 2003 17:54:14 +0000 |
parents | 0aba658cfdd2 |
children | 8806616471e2 |
files | etc/NEWS lisp/ChangeLog lisp/international/utf-7.el src/ChangeLog |
diffstat | 4 files changed, 161 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/etc/NEWS Thu May 08 17:49:05 2003 +0000 +++ b/etc/NEWS Thu May 08 17:54:14 2003 +0000 @@ -88,6 +88,8 @@ * Changes in Emacs 21.4 +** A UTF-7 coding system is available in the library `utf-7'. + ** GUD mode has its own tool bar for controlling execution of the inferior and other common debugger commands. @@ -224,13 +226,17 @@ library. These include complete versions of most of those in codepage.el, based on Unicode mappings. -** The utf-8 coding system has been enhanced. Untranslatable utf-8 -sequences (mostly representing CJK characters) are composed into -single quasi-characters. User option `utf-translate-cjk-mode' -arranges to translate many utf-8 CJK character sequences into real -Emacs characters in a similar way to the Mule-UCS system. The utf-8 -coding system will now encode characters from most of Emacs's +** The utf-8/16 coding systems have been enhanced. +By default, untranslatable utf-8 sequences (mostly representing CJK +characters) are simply composed into single quasi-characters. User +option `utf-translate-cjk' arranges to translate many utf-8 CJK +character sequences into real Emacs characters in a similar way to the +Mule-UCS system. This uses significant space, so is not the default. +You can augment/amend the CJK translation via hash tables +`ucs-mule-cjk-to-unicode' and `ucs-unicode-to-mule-cjk'. The utf-8 +coding system now also encodes characters from most of Emacs's one-dimensional internal charsets, specifically the ISO-8859 ones. +The utf-16 coding system is affected similarly. ** iso-10646-1 (`Unicode') fonts can be used to display any range of characters encodable by the utf-8 coding system. Just specify the @@ -405,7 +411,7 @@ ** Speedbar has moved from the "Tools" top level menu to "Show/Hide". +++ -** Emacs can now indicate in the mode-line the presence of new e-mails +** Emacs can now indicate in the mode-line the presence of new e-mail in a directory or in a file. See the documentation of the user option `display-time-mail-directory'. @@ -1208,7 +1214,7 @@ --- ** The obsolete C mode (c-mode.el) has been removed to avoid problems -with Custom. +with Custom. cplus-md.el, which required it, has also been removed. ** New package benchmark.el contains simple support for convenient timing measurements of code (including the garbage collection component).
--- a/lisp/ChangeLog Thu May 08 17:49:05 2003 +0000 +++ b/lisp/ChangeLog Thu May 08 17:54:14 2003 +0000 @@ -1,3 +1,7 @@ +2003-05-08 Dave Love <fx@gnu.org> + + * international/utf-7.el: New file. + 2003-05-07 Francis J. Wright <F.J.Wright@qmul.ac.uk> * files.el (insert-file-contents-literally): Allow it to be called
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lisp/international/utf-7.el Thu May 08 17:54:14 2003 +0000 @@ -0,0 +1,139 @@ +;;; utf-7.el --- utf-7 coding system + +;; Copyright (C) 2003 Free Software Foundation, Inc. + +;; Author: Dave Love <fx@gnu.org> +;; Keywords: i18n, mail + +;; This file is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; This file is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to +;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; Defines a coding system for UTF-7, defined in RFC 2152. Non-ASCII +;; segments are encoded as base64-encoded big endian UTF-16. Also +;; defines a variation required for IMAP (RFC 2060). + +;; The encoding and decoding was originally taken from Jon K Hellan's +;; implementation in Gnus, but has been substantially re-done. + +;; This probably needs more attention. In particular, it's not +;; completely consistent with iconv's behaviour. It's arguable +;; whether the IMAP version should be a coding system since it's +;; apparently only used for IMAP mailbox names, so it's commented out. + +;;; Code: + +(make-coding-system + 'utf-7 0 ?U + "UTF-7 encoding of Unicode (RFC 2152)" + nil + `((safe-chars . ,(coding-system-get 'utf-16-be 'safe-chars)) + (mime-charset . utf-7) + (pre-write-conversion . utf-7-pre-write-conversion) + (post-read-conversion . utf-7-post-read-conversion))) + +;; (make-coding-system +;; 'utf-7-imap 0 ?u +;; "UTF-7 encoding of Unicode, IMAP version (RFC 2060)" +;; nil +;; `((safe-chars . ,(coding-system-get 'utf-16-be 'safe-chars)) +;; (pre-write-conversion . utf-7-imap-pre-write-conversion) +;; (post-read-conversion . utf-7-imap-post-read-conversion))) + +(defun utf-7-decode (len imap) + "Decode LEN bytes of UTF-7 at point. +IMAP non-nil means use the IMAP version." + (save-excursion + (save-restriction + (narrow-to-region (point) (+ (point) len)) + (let ((not-esc (if imap "^&" "^+")) + (skip-chars (if imap "A-Za-z0-9+," "A-Za-z0-9+/"))) + (while (not (eobp)) + (skip-chars-forward not-esc) + (unless (eobp) + (forward-char) + (let ((p (point)) + (run-length (skip-chars-forward skip-chars))) + (if (eq ?- (char-after)) + (delete-char 1)) + (unless (= run-length 0) ; encoded lone esc-char + (let ((pl (mod (- run-length) 4))) + (insert-char ?= pl) + (if imap + (subst-char-in-region p (point) ?, ?/)) + (base64-decode-region p (point))) + (decode-coding-region p (point) 'utf-16-be) + (save-excursion + (goto-char p) + (delete-backward-char 1))))))) + (- (point-max) (point-min))))) + +(defun utf-7-post-read-conversion (len) + (utf-7-decode len nil)) + +;; (defun utf-7-imap-post-read-conversion (len) +;; (utf-7-decode len t)) + +(defun utf-7-encode (from to imap) + "Encode bytes between FROM and TO to UTF-7. +ESC and SKIP-CHARS are adjusted for the normal and IMAP versions." + (let* ((old-buf (current-buffer)) + (esc (if imap ?& ?+)) + ;; These are characters which can be encoded asis. + (skip-chars (if imap + "\t\n\r\x20-\x25\x27-\x7e" ; rfc2060 + ;; This includes the rfc2152 optional set. + ;; Perhaps it shouldn't (like iconv). + "\t\n\r -*,-[]-}")) + (not-skip-chars (format "^%s%c" skip-chars esc))) + (set-buffer (generate-new-buffer " *temp*")) + (if (stringp from) + (insert from) + (insert-buffer-substring old-buf from to)) + (goto-char (point-min)) + (while (not (eobp)) + (skip-chars-forward skip-chars) + (if (eq ?+ (char-after)) + (progn (forward-char) + (insert ?-)) + (unless (eobp) + (insert esc) + (let ((p (point))) + (skip-chars-forward not-skip-chars) + (save-restriction + ;; encode-coding-region doesn't preserve point + (narrow-to-region p (point)) + (encode-coding-region p (point-max) 'utf-16-be) + (base64-encode-region p (point-max)) + (if imap + (subst-char-in-region p (point-max) ?/ ?,)) + (goto-char p) + ;; As I read the RFC, this isn't correct, but it's + ;; consistent with iconv, at least regarding `='. + (skip-chars-forward "^= \t\n") + (delete-region (point) (point-max)))) + (unless (eobp) + (insert ?-))))) + nil)) + +(defun utf-7-pre-write-conversion (from to) + (utf-7-encode from to nil)) + +;; (defun utf-7-imap-pre-write-conversion (from to) +;; (utf-7-encode from to t)) + +(provide 'utf-7) +;;; utf-7.el ends here
--- a/src/ChangeLog Thu May 08 17:49:05 2003 +0000 +++ b/src/ChangeLog Thu May 08 17:54:14 2003 +0000 @@ -1,3 +1,7 @@ +2003-05-08 Dave Love <fx@gnu.org> + + * coding.c (Vlast_coding_system_used): Doc fix. + 2003-05-07 Jason Rumney <jasonr@gnu.org> * fileio.c (Ffile_symlink_p): Let handlers handle symlinks even