comparison lisp/international/ucs-tables.el @ 88155:d7ddb3e565de

sync with trunk
author Henrik Enberg <henrik.enberg@telia.com>
date Mon, 16 Jan 2006 00:03:54 +0000
parents 0d8b17d428b5
children
comparison
equal deleted inserted replaced
88154:8ce476d3ba36 88155:d7ddb3e565de
1 ;;; ucs-tables.el --- translation to, from and via Unicode -*- coding: iso-2022-7bit -*- 1 ;;; ucs-tables.el --- translation to, from and via Unicode -*- coding: iso-2022-7bit -*-
2 2
3 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc. 3 ;; Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
4 ;; Copyright (C) 2002, 2003
5 ;; National Institute of Advanced Industrial Science and Technology (AIST)
6 ;; Registration Number H14PRO021
4 7
5 ;; Author: Dave Love <fx@gnu.org> 8 ;; Author: Dave Love <fx@gnu.org>
6 ;; Keywords: i18n 9 ;; Keywords: i18n
7 10
8 ;; This file is part of GNU Emacs. 11 ;; This file is part of GNU Emacs.
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details. 21 ;; GNU General Public License for more details.
19 22
20 ;; You should have received a copy of the GNU General Public License 23 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the 24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, 25 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 ;; Boston, MA 02111-1307, USA. 26 ;; Boston, MA 02110-1301, USA.
24 27
25 ;;; Commentary: 28 ;;; Commentary:
26 29
27 ;; This file provides tables mapping between Unicode numbers and 30 ;; This file provides tables mapping between Unicode numbers and
28 ;; emacs-mule characters from the iso-8859 charsets (and others). It 31 ;; emacs-mule characters from the iso-8859 charsets (and others). It
1178 ;; translation-table. 1181 ;; translation-table.
1179 (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) 1182 (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup))
1180 1183
1181 (when for-encode 1184 (when for-encode
1182 ;; Make mule-utf-* encode all characters in ucs-mule-to-mule-unicode. 1185 ;; Make mule-utf-* encode all characters in ucs-mule-to-mule-unicode.
1183 (let ((coding-list '(mule-utf-8 mule-utf-16-be mule-utf-16-le))) 1186 (let ((coding-list '(mule-utf-8 mule-utf-16be mule-utf-16le
1187 mule-utf-16be-with-signature
1188 mule-utf-16le-with-signature)))
1184 (define-translation-table 'utf-translation-table-for-encode 1189 (define-translation-table 'utf-translation-table-for-encode
1185 ucs-mule-to-mule-unicode) 1190 ucs-mule-to-mule-unicode)
1186 (dolist (coding coding-list) 1191 (dolist (coding coding-list)
1187 (set-char-table-parent (coding-system-get coding 'safe-chars) 1192 (set-char-table-parent (coding-system-get coding 'safe-chars)
1188 ucs-mule-to-mule-unicode) 1193 ucs-mule-to-mule-unicode)))
1189 (register-char-codings coding ucs-mule-to-mule-unicode)))
1190 1194
1191 ;; Adjust the 8859 coding systems to fragment the unified characters 1195 ;; Adjust the 8859 coding systems to fragment the unified characters
1192 ;; on encoding. 1196 ;; on encoding.
1193 (dolist (n '(1 2 3 4 5 7 8 9 14 15)) 1197 (dolist (n '(1 2 3 4 5 7 8 9 14 15))
1194 (let* ((coding-system 1198 (let* ((coding-system
1198 (safe (coding-system-get coding-system 'safe-chars))) 1202 (safe (coding-system-get coding-system 'safe-chars)))
1199 ;; Actually, the coding system's safe-chars are not normally 1203 ;; Actually, the coding system's safe-chars are not normally
1200 ;; used after they've been registered, but we might as well 1204 ;; used after they've been registered, but we might as well
1201 ;; record them. Setting the parent here is a convenience. 1205 ;; record them. Setting the parent here is a convenience.
1202 (set-char-table-parent safe table) 1206 (set-char-table-parent safe table)
1203 ;; Update the table of what encodes to what.
1204 (register-char-codings coding-system table)
1205 (coding-system-put coding-system 'translation-table-for-encode table))) 1207 (coding-system-put coding-system 'translation-table-for-encode table)))
1206 (add-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) 1208 (add-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)))
1207 (optimize-char-coding-system-table))
1208 1209
1209 (defun ucs-fragment-8859 (for-encode for-decode) 1210 (defun ucs-fragment-8859 (for-encode for-decode)
1210 "Undo the unification done by `ucs-unify-8859'. 1211 "Undo the unification done by `ucs-unify-8859'.
1211 With prefix arg, undo unification on encoding only, i.e. don't undo 1212 With prefix arg, undo unification on encoding only, i.e. don't undo
1212 unification on input operations." 1213 unification on input operations."
1221 (when for-encode 1222 (when for-encode
1222 ;; Disable mule-utf-* encoding for all characters in 1223 ;; Disable mule-utf-* encoding for all characters in
1223 ;; ucs-mule-to-mule-unicode except what was originally supported 1224 ;; ucs-mule-to-mule-unicode except what was originally supported
1224 ;; and what is translated by utf-translation-table-for-decode when 1225 ;; and what is translated by utf-translation-table-for-decode when
1225 ;; `utf-fragment-on-decoding' is non-nil. 1226 ;; `utf-fragment-on-decoding' is non-nil.
1226 (let ((coding-list '(mule-utf-8 mule-utf-16-be mule-utf-16-le)) 1227 (let ((coding-list '(mule-utf-8 mule-utf-16be mule-utf-16le
1228 mule-utf-16be-with-signature
1229 mule-utf-16le-with-signature))
1227 (safe (coding-system-get 'mule-utf-8 'safe-chars))) 1230 (safe (coding-system-get 'mule-utf-8 'safe-chars)))
1228 (dolist (coding coding-list) 1231 (dolist (coding coding-list)
1229 (set-char-table-parent (coding-system-get coding 'safe-chars) nil)) 1232 (set-char-table-parent (coding-system-get coding 'safe-chars) nil))
1230 ;; Here we assume that all mule-utf-* have the same character
1231 ;; repertory, thus we can use SAFE for all of them.
1232 (map-char-table
1233 (lambda (key val)
1234 (if (and (>= key 128) val
1235 (not (aref safe key)))
1236 (aset char-coding-system-table key
1237 (remq 'mule-utf-8
1238 (remq 'mule-utf-16-le
1239 (remq 'mule-utf-16-be
1240 (aref char-coding-system-table key)))))))
1241 ucs-mule-to-mule-unicode)
1242
1243 (if (not utf-fragment-on-decoding) 1233 (if (not utf-fragment-on-decoding)
1244 (define-translation-table 'utf-translation-table-for-encode) 1234 (define-translation-table 'utf-translation-table-for-encode)
1245 (define-translation-table 'utf-translation-table-for-encode 1235 (define-translation-table 'utf-translation-table-for-encode
1246 utf-defragmentation-table) 1236 utf-defragmentation-table)))
1247 (dolist (coding coding-list) 1237
1248 (register-char-codings coding utf-defragmentation-table)))) 1238 ;; For each charset, remove the parent of `safe-chars' property of
1249 1239 ;; the corresponding coding system.
1250 ;; For each charset, remove the entries in
1251 ;; `char-coding-system-table' added to its safe-chars table (as
1252 ;; its parent).
1253 (dolist (n '(1 2 3 4 5 7 8 9 14 15)) 1240 (dolist (n '(1 2 3 4 5 7 8 9 14 15))
1254 (let* ((coding-system 1241 (let* ((coding-system
1255 (coding-system-base (intern (format "iso-8859-%d" n)))) 1242 (coding-system-base (intern (format "iso-8859-%d" n))))
1256 (table (symbol-value
1257 (intern (format "ucs-8859-%d-encode-table" n))))
1258 (safe (coding-system-get coding-system 'safe-chars))) 1243 (safe (coding-system-get coding-system 'safe-chars)))
1259 (when (char-table-parent safe) 1244 (if (char-table-parent safe)
1260 (map-char-table 1245 (set-char-table-parent safe nil))
1261 (lambda (key val)
1262 (if (and (>= key 128) val)
1263 (let ((codings (aref char-coding-system-table key)))
1264 (aset char-coding-system-table key
1265 (remq coding-system codings)))))
1266 (char-table-parent safe))
1267 (set-char-table-parent safe nil))
1268 (coding-system-put coding-system 'translation-table-for-encode nil))) 1246 (coding-system-put coding-system 'translation-table-for-encode nil)))
1269 (optimize-char-coding-system-table) 1247 (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)))
1270 (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup))
1271 (optimize-char-coding-system-table))
1272 1248
1273 (defun ucs-insert (arg) 1249 (defun ucs-insert (arg)
1274 "Insert the Emacs character representation of the given Unicode. 1250 "Insert the Emacs character representation of the given Unicode.
1275 Interactively, prompts for a hex string giving the code." 1251 Interactively, prompts for a hex string giving the code."
1276 (interactive "sUnicode (hex): ") 1252 (interactive "sUnicode (hex): ")
1277 (let ((c (decode-char 'ucs (if (integerp arg) 1253 (or (integerp arg)
1278 arg 1254 (setq arg (string-to-number arg 16)))
1279 (string-to-number arg 16))))) 1255 (let ((c (decode-char 'ucs arg)))
1280 (if c 1256 (if c
1281 (insert c) 1257 (insert c)
1282 (error "Character can't be decoded to UCS")))) 1258 (if (or (< arg 0) (> arg #x10FFFF))
1259 (error "Not a Unicode character code: 0x%X" arg)
1260 (error "Character U+%04X is not yet supported" arg)))))
1283 1261
1284 ;;; Dealing with non-8859 character sets. 1262 ;;; Dealing with non-8859 character sets.
1285 1263
1286 ;; We only set up translation on encoding to utf-8. Also translation 1264 ;; We only set up translation on encoding to utf-8. Also translation
1287 ;; tables ucs-CS-encode-table are constructed for some coding systems 1265 ;; tables ucs-CS-encode-table are constructed for some coding systems
2459 "Set up translation-tables for unifying ISO 8859 characters on encoding. 2437 "Set up translation-tables for unifying ISO 8859 characters on encoding.
2460 2438
2461 The ISO 8859 characters sets overlap, e.g. 8859-1 (Latin-1) and 2439 The ISO 8859 characters sets overlap, e.g. 8859-1 (Latin-1) and
2462 8859-15 (Latin-9) differ only in a few characters. Emacs normally 2440 8859-15 (Latin-9) differ only in a few characters. Emacs normally
2463 distinguishes equivalent characters from those ISO-8859 character sets 2441 distinguishes equivalent characters from those ISO-8859 character sets
2464 which are built in to Emacs. This behaviour is essentially inherited 2442 which are built in to Emacs. This behavior is essentially inherited
2465 from the European-originated international standards. Treating them 2443 from the European-originated international standards. Treating them
2466 equivalently, by translating to and from a single representation is 2444 equivalently, by translating to and from a single representation is
2467 called `unification'. (The `utf-8' coding system treats the 2445 called `unification'. (The `utf-8' coding system treats the
2468 characters of European scripts in a unified manner.) 2446 characters of European scripts in a unified manner.)
2469 2447
2519 ;; Arrange to set up the translation-table for keyboard input. This 2497 ;; Arrange to set up the translation-table for keyboard input. This
2520 ;; is called from get-buffer-create, set-buffer-file-coding-system, 2498 ;; is called from get-buffer-create, set-buffer-file-coding-system,
2521 ;; normal-mode and minibuffer-setup-hook. 2499 ;; normal-mode and minibuffer-setup-hook.
2522 (defun ucs-set-table-for-input (&optional buffer) 2500 (defun ucs-set-table-for-input (&optional buffer)
2523 "Set up an appropriate `translation-table-for-input' for BUFFER. 2501 "Set up an appropriate `translation-table-for-input' for BUFFER.
2524 BUFFER defaults to the current buffer." 2502 BUFFER defaults to the current buffer. This function is
2503 automatically called directly at the end of `get-buffer-create'."
2525 (when (and unify-8859-on-encoding-mode 2504 (when (and unify-8859-on-encoding-mode
2505 (not unify-8859-on-decoding-mode)
2526 (char-table-p translation-table-for-input)) 2506 (char-table-p translation-table-for-input))
2527 (let ((cs (and buffer-file-coding-system 2507 (let ((cs (and buffer-file-coding-system
2528 (coding-system-base buffer-file-coding-system))) 2508 (coding-system-base buffer-file-coding-system)))
2529 table) 2509 table)
2530 (if (or (null cs) 2510 (if (or (null cs)
2532 (setq cs 2512 (setq cs
2533 (and default-buffer-file-coding-system 2513 (and default-buffer-file-coding-system
2534 (coding-system-base default-buffer-file-coding-system)))) 2514 (coding-system-base default-buffer-file-coding-system))))
2535 (when cs 2515 (when cs
2536 (setq table (coding-system-get cs 'translation-table-for-encode)) 2516 (setq table (coding-system-get cs 'translation-table-for-encode))
2517 (if (and table (symbolp table))
2518 (setq table (get table 'translation-table)))
2537 (unless (char-table-p table) 2519 (unless (char-table-p table)
2538 (setq table (coding-system-get cs 'translation-table-for-input))) 2520 (setq table (coding-system-get cs 'translation-table-for-input))
2521 (if (and table (symbolp table))
2522 (setq table (get table 'translation-table))))
2539 (when (char-table-p table) 2523 (when (char-table-p table)
2540 (if buffer 2524 (if buffer
2541 (with-current-buffer buffer 2525 (with-current-buffer buffer
2542 (set (make-variable-buffer-local 'translation-table-for-input) 2526 (set (make-local-variable 'translation-table-for-input)
2543 table)) 2527 table))
2544 (set (make-variable-buffer-local 'translation-table-for-input) 2528 (set (make-local-variable 'translation-table-for-input)
2545 table))))))) 2529 table)))))))
2546 2530
2547 ;; The minibuffer needs to acquire a `buffer-file-coding-system' for 2531 ;; The minibuffer needs to acquire a `buffer-file-coding-system' for
2548 ;; the above to work in it. 2532 ;; the above to work in it.
2549 (defun ucs-minibuffer-setup () 2533 (defun ucs-minibuffer-setup ()
2556 buffer-file-coding-system)) 2540 buffer-file-coding-system))
2557 (ucs-set-table-for-input)) 2541 (ucs-set-table-for-input))
2558 2542
2559 (provide 'ucs-tables) 2543 (provide 'ucs-tables)
2560 2544
2545 ;; arch-tag: b497e22b-7fe1-486a-9352-e2d7f7d76a76
2561 ;;; ucs-tables.el ends here 2546 ;;; ucs-tables.el ends here