Mercurial > emacs
comparison lisp/international/ucs-tables.el @ 88155:d7ddb3e565de
sync with trunk
author | Henrik Enberg <henrik.enberg@telia.com> |
---|---|
date | Mon, 16 Jan 2006 00:03:54 +0000 |
parents | 0d8b17d428b5 |
children |
comparison
equal
deleted
inserted
replaced
88154:8ce476d3ba36 | 88155:d7ddb3e565de |
---|---|
1 ;;; ucs-tables.el --- translation to, from and via Unicode -*- coding: iso-2022-7bit -*- | 1 ;;; ucs-tables.el --- translation to, from and via Unicode -*- coding: iso-2022-7bit -*- |
2 | 2 |
3 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc. | 3 ;; Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc. |
4 ;; Copyright (C) 2002, 2003 | |
5 ;; National Institute of Advanced Industrial Science and Technology (AIST) | |
6 ;; Registration Number H14PRO021 | |
4 | 7 |
5 ;; Author: Dave Love <fx@gnu.org> | 8 ;; Author: Dave Love <fx@gnu.org> |
6 ;; Keywords: i18n | 9 ;; Keywords: i18n |
7 | 10 |
8 ;; This file is part of GNU Emacs. | 11 ;; This file is part of GNU Emacs. |
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18 ;; GNU General Public License for more details. | 21 ;; GNU General Public License for more details. |
19 | 22 |
20 ;; You should have received a copy of the GNU General Public License | 23 ;; You should have received a copy of the GNU General Public License |
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the | 24 ;; along with GNU Emacs; see the file COPYING. If not, write to the |
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 25 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
23 ;; Boston, MA 02111-1307, USA. | 26 ;; Boston, MA 02110-1301, USA. |
24 | 27 |
25 ;;; Commentary: | 28 ;;; Commentary: |
26 | 29 |
27 ;; This file provides tables mapping between Unicode numbers and | 30 ;; This file provides tables mapping between Unicode numbers and |
28 ;; emacs-mule characters from the iso-8859 charsets (and others). It | 31 ;; emacs-mule characters from the iso-8859 charsets (and others). It |
1178 ;; translation-table. | 1181 ;; translation-table. |
1179 (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) | 1182 (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) |
1180 | 1183 |
1181 (when for-encode | 1184 (when for-encode |
1182 ;; Make mule-utf-* encode all characters in ucs-mule-to-mule-unicode. | 1185 ;; Make mule-utf-* encode all characters in ucs-mule-to-mule-unicode. |
1183 (let ((coding-list '(mule-utf-8 mule-utf-16-be mule-utf-16-le))) | 1186 (let ((coding-list '(mule-utf-8 mule-utf-16be mule-utf-16le |
1187 mule-utf-16be-with-signature | |
1188 mule-utf-16le-with-signature))) | |
1184 (define-translation-table 'utf-translation-table-for-encode | 1189 (define-translation-table 'utf-translation-table-for-encode |
1185 ucs-mule-to-mule-unicode) | 1190 ucs-mule-to-mule-unicode) |
1186 (dolist (coding coding-list) | 1191 (dolist (coding coding-list) |
1187 (set-char-table-parent (coding-system-get coding 'safe-chars) | 1192 (set-char-table-parent (coding-system-get coding 'safe-chars) |
1188 ucs-mule-to-mule-unicode) | 1193 ucs-mule-to-mule-unicode))) |
1189 (register-char-codings coding ucs-mule-to-mule-unicode))) | |
1190 | 1194 |
1191 ;; Adjust the 8859 coding systems to fragment the unified characters | 1195 ;; Adjust the 8859 coding systems to fragment the unified characters |
1192 ;; on encoding. | 1196 ;; on encoding. |
1193 (dolist (n '(1 2 3 4 5 7 8 9 14 15)) | 1197 (dolist (n '(1 2 3 4 5 7 8 9 14 15)) |
1194 (let* ((coding-system | 1198 (let* ((coding-system |
1198 (safe (coding-system-get coding-system 'safe-chars))) | 1202 (safe (coding-system-get coding-system 'safe-chars))) |
1199 ;; Actually, the coding system's safe-chars are not normally | 1203 ;; Actually, the coding system's safe-chars are not normally |
1200 ;; used after they've been registered, but we might as well | 1204 ;; used after they've been registered, but we might as well |
1201 ;; record them. Setting the parent here is a convenience. | 1205 ;; record them. Setting the parent here is a convenience. |
1202 (set-char-table-parent safe table) | 1206 (set-char-table-parent safe table) |
1203 ;; Update the table of what encodes to what. | |
1204 (register-char-codings coding-system table) | |
1205 (coding-system-put coding-system 'translation-table-for-encode table))) | 1207 (coding-system-put coding-system 'translation-table-for-encode table))) |
1206 (add-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) | 1208 (add-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup))) |
1207 (optimize-char-coding-system-table)) | |
1208 | 1209 |
1209 (defun ucs-fragment-8859 (for-encode for-decode) | 1210 (defun ucs-fragment-8859 (for-encode for-decode) |
1210 "Undo the unification done by `ucs-unify-8859'. | 1211 "Undo the unification done by `ucs-unify-8859'. |
1211 With prefix arg, undo unification on encoding only, i.e. don't undo | 1212 With prefix arg, undo unification on encoding only, i.e. don't undo |
1212 unification on input operations." | 1213 unification on input operations." |
1221 (when for-encode | 1222 (when for-encode |
1222 ;; Disable mule-utf-* encoding for all characters in | 1223 ;; Disable mule-utf-* encoding for all characters in |
1223 ;; ucs-mule-to-mule-unicode except what was originally supported | 1224 ;; ucs-mule-to-mule-unicode except what was originally supported |
1224 ;; and what is translated by utf-translation-table-for-decode when | 1225 ;; and what is translated by utf-translation-table-for-decode when |
1225 ;; `utf-fragment-on-decoding' is non-nil. | 1226 ;; `utf-fragment-on-decoding' is non-nil. |
1226 (let ((coding-list '(mule-utf-8 mule-utf-16-be mule-utf-16-le)) | 1227 (let ((coding-list '(mule-utf-8 mule-utf-16be mule-utf-16le |
1228 mule-utf-16be-with-signature | |
1229 mule-utf-16le-with-signature)) | |
1227 (safe (coding-system-get 'mule-utf-8 'safe-chars))) | 1230 (safe (coding-system-get 'mule-utf-8 'safe-chars))) |
1228 (dolist (coding coding-list) | 1231 (dolist (coding coding-list) |
1229 (set-char-table-parent (coding-system-get coding 'safe-chars) nil)) | 1232 (set-char-table-parent (coding-system-get coding 'safe-chars) nil)) |
1230 ;; Here we assume that all mule-utf-* have the same character | |
1231 ;; repertory, thus we can use SAFE for all of them. | |
1232 (map-char-table | |
1233 (lambda (key val) | |
1234 (if (and (>= key 128) val | |
1235 (not (aref safe key))) | |
1236 (aset char-coding-system-table key | |
1237 (remq 'mule-utf-8 | |
1238 (remq 'mule-utf-16-le | |
1239 (remq 'mule-utf-16-be | |
1240 (aref char-coding-system-table key))))))) | |
1241 ucs-mule-to-mule-unicode) | |
1242 | |
1243 (if (not utf-fragment-on-decoding) | 1233 (if (not utf-fragment-on-decoding) |
1244 (define-translation-table 'utf-translation-table-for-encode) | 1234 (define-translation-table 'utf-translation-table-for-encode) |
1245 (define-translation-table 'utf-translation-table-for-encode | 1235 (define-translation-table 'utf-translation-table-for-encode |
1246 utf-defragmentation-table) | 1236 utf-defragmentation-table))) |
1247 (dolist (coding coding-list) | 1237 |
1248 (register-char-codings coding utf-defragmentation-table)))) | 1238 ;; For each charset, remove the parent of `safe-chars' property of |
1249 | 1239 ;; the corresponding coding system. |
1250 ;; For each charset, remove the entries in | |
1251 ;; `char-coding-system-table' added to its safe-chars table (as | |
1252 ;; its parent). | |
1253 (dolist (n '(1 2 3 4 5 7 8 9 14 15)) | 1240 (dolist (n '(1 2 3 4 5 7 8 9 14 15)) |
1254 (let* ((coding-system | 1241 (let* ((coding-system |
1255 (coding-system-base (intern (format "iso-8859-%d" n)))) | 1242 (coding-system-base (intern (format "iso-8859-%d" n)))) |
1256 (table (symbol-value | |
1257 (intern (format "ucs-8859-%d-encode-table" n)))) | |
1258 (safe (coding-system-get coding-system 'safe-chars))) | 1243 (safe (coding-system-get coding-system 'safe-chars))) |
1259 (when (char-table-parent safe) | 1244 (if (char-table-parent safe) |
1260 (map-char-table | 1245 (set-char-table-parent safe nil)) |
1261 (lambda (key val) | |
1262 (if (and (>= key 128) val) | |
1263 (let ((codings (aref char-coding-system-table key))) | |
1264 (aset char-coding-system-table key | |
1265 (remq coding-system codings))))) | |
1266 (char-table-parent safe)) | |
1267 (set-char-table-parent safe nil)) | |
1268 (coding-system-put coding-system 'translation-table-for-encode nil))) | 1246 (coding-system-put coding-system 'translation-table-for-encode nil))) |
1269 (optimize-char-coding-system-table) | 1247 (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup))) |
1270 (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) | |
1271 (optimize-char-coding-system-table)) | |
1272 | 1248 |
1273 (defun ucs-insert (arg) | 1249 (defun ucs-insert (arg) |
1274 "Insert the Emacs character representation of the given Unicode. | 1250 "Insert the Emacs character representation of the given Unicode. |
1275 Interactively, prompts for a hex string giving the code." | 1251 Interactively, prompts for a hex string giving the code." |
1276 (interactive "sUnicode (hex): ") | 1252 (interactive "sUnicode (hex): ") |
1277 (let ((c (decode-char 'ucs (if (integerp arg) | 1253 (or (integerp arg) |
1278 arg | 1254 (setq arg (string-to-number arg 16))) |
1279 (string-to-number arg 16))))) | 1255 (let ((c (decode-char 'ucs arg))) |
1280 (if c | 1256 (if c |
1281 (insert c) | 1257 (insert c) |
1282 (error "Character can't be decoded to UCS")))) | 1258 (if (or (< arg 0) (> arg #x10FFFF)) |
1259 (error "Not a Unicode character code: 0x%X" arg) | |
1260 (error "Character U+%04X is not yet supported" arg))))) | |
1283 | 1261 |
1284 ;;; Dealing with non-8859 character sets. | 1262 ;;; Dealing with non-8859 character sets. |
1285 | 1263 |
1286 ;; We only set up translation on encoding to utf-8. Also translation | 1264 ;; We only set up translation on encoding to utf-8. Also translation |
1287 ;; tables ucs-CS-encode-table are constructed for some coding systems | 1265 ;; tables ucs-CS-encode-table are constructed for some coding systems |
2459 "Set up translation-tables for unifying ISO 8859 characters on encoding. | 2437 "Set up translation-tables for unifying ISO 8859 characters on encoding. |
2460 | 2438 |
2461 The ISO 8859 characters sets overlap, e.g. 8859-1 (Latin-1) and | 2439 The ISO 8859 characters sets overlap, e.g. 8859-1 (Latin-1) and |
2462 8859-15 (Latin-9) differ only in a few characters. Emacs normally | 2440 8859-15 (Latin-9) differ only in a few characters. Emacs normally |
2463 distinguishes equivalent characters from those ISO-8859 character sets | 2441 distinguishes equivalent characters from those ISO-8859 character sets |
2464 which are built in to Emacs. This behaviour is essentially inherited | 2442 which are built in to Emacs. This behavior is essentially inherited |
2465 from the European-originated international standards. Treating them | 2443 from the European-originated international standards. Treating them |
2466 equivalently, by translating to and from a single representation is | 2444 equivalently, by translating to and from a single representation is |
2467 called `unification'. (The `utf-8' coding system treats the | 2445 called `unification'. (The `utf-8' coding system treats the |
2468 characters of European scripts in a unified manner.) | 2446 characters of European scripts in a unified manner.) |
2469 | 2447 |
2519 ;; Arrange to set up the translation-table for keyboard input. This | 2497 ;; Arrange to set up the translation-table for keyboard input. This |
2520 ;; is called from get-buffer-create, set-buffer-file-coding-system, | 2498 ;; is called from get-buffer-create, set-buffer-file-coding-system, |
2521 ;; normal-mode and minibuffer-setup-hook. | 2499 ;; normal-mode and minibuffer-setup-hook. |
2522 (defun ucs-set-table-for-input (&optional buffer) | 2500 (defun ucs-set-table-for-input (&optional buffer) |
2523 "Set up an appropriate `translation-table-for-input' for BUFFER. | 2501 "Set up an appropriate `translation-table-for-input' for BUFFER. |
2524 BUFFER defaults to the current buffer." | 2502 BUFFER defaults to the current buffer. This function is |
2503 automatically called directly at the end of `get-buffer-create'." | |
2525 (when (and unify-8859-on-encoding-mode | 2504 (when (and unify-8859-on-encoding-mode |
2505 (not unify-8859-on-decoding-mode) | |
2526 (char-table-p translation-table-for-input)) | 2506 (char-table-p translation-table-for-input)) |
2527 (let ((cs (and buffer-file-coding-system | 2507 (let ((cs (and buffer-file-coding-system |
2528 (coding-system-base buffer-file-coding-system))) | 2508 (coding-system-base buffer-file-coding-system))) |
2529 table) | 2509 table) |
2530 (if (or (null cs) | 2510 (if (or (null cs) |
2532 (setq cs | 2512 (setq cs |
2533 (and default-buffer-file-coding-system | 2513 (and default-buffer-file-coding-system |
2534 (coding-system-base default-buffer-file-coding-system)))) | 2514 (coding-system-base default-buffer-file-coding-system)))) |
2535 (when cs | 2515 (when cs |
2536 (setq table (coding-system-get cs 'translation-table-for-encode)) | 2516 (setq table (coding-system-get cs 'translation-table-for-encode)) |
2517 (if (and table (symbolp table)) | |
2518 (setq table (get table 'translation-table))) | |
2537 (unless (char-table-p table) | 2519 (unless (char-table-p table) |
2538 (setq table (coding-system-get cs 'translation-table-for-input))) | 2520 (setq table (coding-system-get cs 'translation-table-for-input)) |
2521 (if (and table (symbolp table)) | |
2522 (setq table (get table 'translation-table)))) | |
2539 (when (char-table-p table) | 2523 (when (char-table-p table) |
2540 (if buffer | 2524 (if buffer |
2541 (with-current-buffer buffer | 2525 (with-current-buffer buffer |
2542 (set (make-variable-buffer-local 'translation-table-for-input) | 2526 (set (make-local-variable 'translation-table-for-input) |
2543 table)) | 2527 table)) |
2544 (set (make-variable-buffer-local 'translation-table-for-input) | 2528 (set (make-local-variable 'translation-table-for-input) |
2545 table))))))) | 2529 table))))))) |
2546 | 2530 |
2547 ;; The minibuffer needs to acquire a `buffer-file-coding-system' for | 2531 ;; The minibuffer needs to acquire a `buffer-file-coding-system' for |
2548 ;; the above to work in it. | 2532 ;; the above to work in it. |
2549 (defun ucs-minibuffer-setup () | 2533 (defun ucs-minibuffer-setup () |
2556 buffer-file-coding-system)) | 2540 buffer-file-coding-system)) |
2557 (ucs-set-table-for-input)) | 2541 (ucs-set-table-for-input)) |
2558 | 2542 |
2559 (provide 'ucs-tables) | 2543 (provide 'ucs-tables) |
2560 | 2544 |
2545 ;; arch-tag: b497e22b-7fe1-486a-9352-e2d7f7d76a76 | |
2561 ;;; ucs-tables.el ends here | 2546 ;;; ucs-tables.el ends here |