comparison lisp/international/ucs-tables.el @ 46506:a7f933a7b003

Optimize tables. Deal with some non-8859 charsets. (ucs-mule-to-mule-unicode): New. (ucs-unify-8859): Use utf-8-fragment-on-decoding, set up Quail translation. (ucs-fragment-8859): Modified consistent with ucs-unify-8859. (unify-8859-on-encoding-mode): Doc mod. Fix custom version. (unify-8859-on-decoding-mode): Doc mod. Change code. Fix custom version. Add custom dependencies. (ucs-insert): Check for null from decode-char. (translation-table-for-input, ucs-quail-activate) (ucs-minibuffer-setup, ccl-encode-unicode-font) (ucs-tables-unload-hook): New.
author Dave Love <fx@gnu.org>
date Wed, 17 Jul 2002 19:21:41 +0000
parents aee6ae77d23b
children 6262acd6c7ac
comparison
equal deleted inserted replaced
46505:005d282a48ed 46506:a7f933a7b003
23 ;; Boston, MA 02111-1307, USA. 23 ;; Boston, MA 02111-1307, USA.
24 24
25 ;;; Commentary: 25 ;;; Commentary:
26 26
27 ;; This file provides tables mapping between Unicode numbers and 27 ;; This file provides tables mapping between Unicode numbers and
28 ;; emacs-mule characters from the iso8859 charsets (and others). It 28 ;; emacs-mule characters from the iso-8859 charsets (and others). It
29 ;; also provides some auxiliary functions. 29 ;; also provides some auxiliary functions.
30 30
31 ;; These tables are used to construct other mappings between the Mule 31 ;; These tables are used to construct other mappings between the Mule
32 ;; iso8859 charsets and the emacs-unicode charsets and a table that 32 ;; iso8859 charsets and the emacs-unicode charsets and a table that
33 ;; unifies iso8859 characters using a single charset as far as 33 ;; unifies iso-8859 characters using a single charset as far as
34 ;; possible. These tables are used by latin1-disp.el to display some 34 ;; possible. These tables are used by latin1-disp.el to display some
35 ;; Unicode characters without a Unicode font and by utf-8.el to unify 35 ;; Unicode characters without a Unicode font and by utf-8.el to unify
36 ;; Latin-N as far as possible on encoding. 36 ;; Latin-N as far as possible on encoding.
37 37
38 ;; More drastically, they can be used to unify 8859 into Latin-1 plus 38 ;; More drastically, they can be used to unify 8859 into Latin-1 plus
42 ;; which are supposed to contain distinct 8859 charsets. Also, it can 42 ;; which are supposed to contain distinct 8859 charsets. Also, it can
43 ;; make reading and writing of emacs-mule and iso-2022-based encodings 43 ;; make reading and writing of emacs-mule and iso-2022-based encodings
44 ;; not idempotent. 44 ;; not idempotent.
45 45
46 ;; Global minor modes are provided to unify on encoding and decoding. 46 ;; Global minor modes are provided to unify on encoding and decoding.
47 47 ;; These could be extended to non-iso-8859 charsets. However 8859 is
48 ;; The translation table `ucs-mule-to-mule-unicode' is populated. 48 ;; all that users normally care about unifying although, for instance,
49 ;; This is used by the `mule-utf-8' coding system to encode extra 49 ;; Greek occurs in as many as nine Emacs charsets.
50 ;; characters. 50
51 ;; The translation table `ucs-mule-to-mule-unicode' is populated,
52 ;; which could be used for more general unification on decoding. This
53 ;; is used by the `mule-utf-8' coding system to encode extra
54 ;; characters, and also by the coding systems set up by code-pages.el.
55 ;; The decoding tables here take account of
56 ;; `utf-8-fragment-on-decoding' which may specify decoding Greek and
57 ;; Cyrillic into 8859 charsets.
58
59 ;; Unification also puts a `translation-table-for-input' property on
60 ;; relevant coding coding systems and arranges for the
61 ;; `translation-table-for-input' variable to be set either globally or
62 ;; locally. This is used by Quail input methods to translate input
63 ;; characters appropriately for the buffer's coding system (if
64 ;; possible). Unification on decoding sets it globally to translate
65 ;; to Unicode. Unification on encoding uses hooks to set it up
66 ;; locally to buffers. Thus in the latter case, typing `"a' into a
67 ;; Latin-1 buffer using the `latin-2-prefix' method translates the
68 ;; generated latin-iso8859-2 `,Bd(B' into latin-iso8859-1 `,Ad(B'.
69
70 ;; NB, this code depends on the default value of
71 ;; `enable-character-translation'. (Making it nil would anyway lead
72 ;; to inconsistent behaviour between CCL-based coding systems which
73 ;; use explicit translation tables and the rest.)
51 74
52 ;; Command `ucs-insert' is convenient for inserting a given Unicode. 75 ;; Command `ucs-insert' is convenient for inserting a given Unicode.
53 ;; (See also the `ucs' input method.) 76 ;; (See also the `ucs' input method.)
77
78 ;; A replacement CCL program is provided which allows characters in
79 ;; the `ucs-mule-to-mule-unicode' table to be displayed with an
80 ;; iso-10646-encoded font. E.g. to use a `Unicode' font for Cyrillic:
81 ;;
82 ;; (set-fontset-font "fontset-startup"
83 ;; (cons (make-char 'cyrillic-iso8859-5 160)
84 ;; (make-char 'cyrillic-iso8859-5 255))
85 ;; '(nil . "ISO10646-1"))
54 86
55 ;;; Code: 87 ;;; Code:
56 88
57 ;;; Define tables, to be populated later. 89 ;;; Define tables, to be populated later.
58 90
1065 l) 1097 l)
1066 (while (< i 256) 1098 (while (< i 256)
1067 (push (cons (make-char 'latin-iso8859-1 (- i 128)) i) 1099 (push (cons (make-char 'latin-iso8859-1 (- i 128)) i)
1068 l) 1100 l)
1069 (setq i (1+ i))) 1101 (setq i (1+ i)))
1070 (nreverse l))) 1102 (nreverse l))))
1071
1072 ;; (case-table (standard-case-table))
1073 ;; (syntax-table (standard-syntax-table))
1074 )
1075 1103
1076 ;; Convert the lists to the basic char tables. 1104 ;; Convert the lists to the basic char tables.
1077 (dolist (n (list 15 14 9 8 7 5 4 3 2 1)) 1105 (dolist (n (list 15 14 9 8 7 5 4 3 2 1))
1078 (let ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n))))) 1106 (let ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n)))))
1079 (dolist (pair alist) 1107 (dolist (pair alist)
1082 (mu (decode-char 'ucs (cdr pair)))) 1110 (mu (decode-char 'ucs (cdr pair))))
1083 (aset ucs-mule-8859-to-ucs-table mule uc) 1111 (aset ucs-mule-8859-to-ucs-table mule uc)
1084 ;; (aset ucs-ucs-to-mule-8859-table uc mule) 1112 ;; (aset ucs-ucs-to-mule-8859-table uc mule)
1085 ;; (aset ucs-mule-unicode-to-mule-8859 mu mule) 1113 ;; (aset ucs-mule-unicode-to-mule-8859 mu mule)
1086 (aset ucs-mule-8859-to-mule-unicode mule mu) 1114 (aset ucs-mule-8859-to-mule-unicode mule mu)
1087 (aset ucs-mule-to-mule-unicode mule mu))) 1115 (aset ucs-mule-to-mule-unicode mule mu)))))
1088 ;; I think this is actually done OK in characters.el. 1116 ;; The table optimizing here and elsewhere probably isn't very
1089 ;; Probably things like accents shouldn't have word syntax, but the 1117 ;; useful, but seems good practice.
1090 ;; Latin-N syntax tables currently aren't consistent for such 1118 (optimize-char-table ucs-mule-to-mule-unicode)
1091 ;; characters anyhow. 1119 (optimize-char-table ucs-mule-8859-to-mule-unicode)
1092 ;; ;; Make the mule-unicode characters inherit syntax and case info
1093 ;; ;; if they don't already have it.
1094 ;; (dolist (pair alist)
1095 ;; (let ((mule (car pair))
1096 ;; (uc (cdr pair))
1097 ;; (mu (decode-char 'ucs (cdr pair))))
1098 ;; (let ((syntax (aref syntax-table mule)))
1099 ;; (if (eq mule (downcase mule))
1100 ;; (if (eq mule (upcase mule)) ; non-letter or uncased letter
1101 ;; (progn
1102 ;; (if (= 4 (car syntax)) ; left delim
1103 ;; (progn
1104 ;; (aset syntax-table
1105 ;; mu
1106 ;; (cons 4 (aref ucs-mule-8859-to-mule-unicode
1107 ;; (cdr syntax))))
1108 ;; (aset syntax-table
1109 ;; (aref ucs-mule-8859-to-mule-unicode
1110 ;; (cdr syntax))
1111 ;; (cons 5 mu)))
1112 ;; (aset syntax-table mu syntax))
1113 ;; (aset case-table mu mu)))
1114 ;; ;; Upper case letter
1115 ;; (let ((lower (aref ucs-mule-8859-to-mule-unicode
1116 ;; (aref case-table mule))))
1117 ;; (aset case-table mu lower)
1118 ;; (aset case-table lower lower)
1119 ;; (modify-syntax-entry lower "w " syntax-table)
1120 ;; (modify-syntax-entry mu "w " syntax-table))))))
1121 ))
1122 ;; Derive tables that can be used as per-coding-system 1120 ;; Derive tables that can be used as per-coding-system
1123 ;; `translation-table-for-encode's. 1121 ;; `translation-table-for-encode's.
1124 (dolist (n (list 15 14 9 8 7 5 4 3 2 1)) 1122 (dolist (n (list 15 14 9 8 7 5 4 3 2 1))
1125 (let* ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n)))) 1123 (let* ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n))))
1126 (encode-translator (set (intern (format "ucs-8859-%d-encode-table" 1124 (encode-translator (set (intern (format "ucs-8859-%d-encode-table"
1136 ;; unicode as some character in this set. 1134 ;; unicode as some character in this set.
1137 (map-char-table (lambda (k v) 1135 (map-char-table (lambda (k v)
1138 (if (and (setq elt (rassq v alist)) 1136 (if (and (setq elt (rassq v alist))
1139 (not (assq k alist))) 1137 (not (assq k alist)))
1140 (aset encode-translator k (car elt)))) 1138 (aset encode-translator k (car elt))))
1141 ucs-mule-8859-to-ucs-table)))) 1139 ucs-mule-8859-to-ucs-table)
1140 (optimize-char-table encode-translator))))
1142 1141
1143 ;; Register for use in CCL. 1142 ;; Register for use in CCL.
1144 (define-translation-table 'ucs-mule-8859-to-mule-unicode 1143 (define-translation-table 'ucs-mule-8859-to-mule-unicode
1145 ucs-mule-8859-to-mule-unicode) 1144 ucs-mule-8859-to-mule-unicode)
1146 1145 (define-translation-table 'ucs-mule-to-mule-unicode
1147 ;; Fixme: Make this reversible, which means frobbing 1146 ucs-mule-to-mule-unicode)
1148 ;; `char-coding-system-table' directly to remove what we added -- see 1147
1149 ;; codepages.el. Also make it a user option.
1150 (defun ucs-unify-8859 (&optional encode-only) 1148 (defun ucs-unify-8859 (&optional encode-only)
1151 "Set up translation tables for unifying characters from ISO 8859. 1149 "Set up translation tables for unifying characters from ISO 8859.
1152 1150
1153 On decoding, non-ASCII characters are mapped into the `iso-latin-1' 1151 On decoding, non-ASCII characters are mapped into the `iso-latin-1'
1154 and `mule-unicode-0100-24ff' charsets. On encoding, these are mapped 1152 and `mule-unicode-0100-24ff' charsets. On encoding, these are mapped
1157 With prefix arg, do unification on encoding only, i.e. don't unify 1155 With prefix arg, do unification on encoding only, i.e. don't unify
1158 everything on input operations." 1156 everything on input operations."
1159 (interactive "P") 1157 (interactive "P")
1160 (unless encode-only 1158 (unless encode-only
1161 ;; Unify 8859 on decoding. (Non-CCL coding systems only.) 1159 ;; Unify 8859 on decoding. (Non-CCL coding systems only.)
1162 (unify-8859-on-decoding-mode 1)) 1160 (if utf-8-fragment-on-decoding
1161 (map-char-table
1162 (lambda (k v)
1163 (if v (aset ucs-mule-to-mule-unicode v nil)))
1164 utf-8-translation-table-for-decode)
1165 ;; Reset in case it was changed.
1166 (map-char-table
1167 (lambda (k v)
1168 (if v (aset ucs-mule-to-mule-unicode v k)))
1169 utf-8-translation-table-for-decode))
1170 (set-char-table-parent standard-translation-table-for-decode
1171 ucs-mule-8859-to-mule-unicode)
1172 ;; Translate Quail input globally.
1173 (setq-default translation-table-for-input ucs-mule-to-mule-unicode)
1174 ;; In case these are set up, but we should use the global
1175 ;; translation table.
1176 (remove-hook 'quail-activate-hook 'ucs-quail-activate)
1177 (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup))
1163 ;; Adjust the 8859 coding systems to fragment the unified characters 1178 ;; Adjust the 8859 coding systems to fragment the unified characters
1164 ;; on encoding. 1179 ;; on encoding.
1165 (dolist (n '(1 2 3 4 5 7 8 9 14 15)) 1180 (dolist (n '(1 2 3 4 5 7 8 9 14 15))
1166 (let* ((coding-system 1181 (let* ((coding-system
1167 (coding-system-base (intern (format "iso-8859-%d" n)))) 1182 (coding-system-base (intern (format "iso-8859-%d" n))))
1172 ;; used after they've been registered, but we might as well 1187 ;; used after they've been registered, but we might as well
1173 ;; record them. Setting the parent here is a convenience. 1188 ;; record them. Setting the parent here is a convenience.
1174 (set-char-table-parent safe table) 1189 (set-char-table-parent safe table)
1175 ;; Update the table of what encodes to what. 1190 ;; Update the table of what encodes to what.
1176 (register-char-codings coding-system table) 1191 (register-char-codings coding-system table)
1177 (coding-system-put coding-system 'translation-table-for-encode table))) 1192 (coding-system-put coding-system 'translation-table-for-encode table)
1178 1193 (coding-system-put coding-system 'translation-table-for-input table)))
1179 ;;; The following works for the bundled coding systems, but it's 1194 ;; Arrange local translation tables for Quail input.
1180 ;;; better to use the Unicode-based ones and make it irrelevant. 1195 (add-hook 'quail-activate-hook 'ucs-quail-activate)
1181 1196 (add-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup))
1182 ;;; ;; Update the Cyrillic special cases.
1183 ;;; ;; `translation-table-for-encode' doesn't work for CCL coding
1184 ;;; ;; systems, and `standard-translation-table-for-decode' isn't
1185 ;;; ;; applied.
1186 ;;; (let ((table (get 'cyrillic-koi8-r-encode-table 'translation-table)))
1187 ;;; (map-char-table
1188 ;;; (lambda (k v)
1189 ;;; (aset table
1190 ;;; (or (aref ucs-8859-5-encode-table k)
1191 ;;; k)
1192 ;;; v))
1193 ;;; table)
1194 ;;; (register-char-codings 'cyrillic-koi8 table))
1195 ;;; (let ((table (get 'cyrillic-koi8-r-nonascii-translation-table
1196 ;;; 'translation-table)))
1197 ;;; (map-char-table
1198 ;;; (lambda (k v)
1199 ;;; (if v (aset table k (or (aref ucs-mule-8859-to-mule-unicode v)
1200 ;;; v))))
1201 ;;; table))
1202 ;;; ;; Redefine this, since the orginal only translated 8859-5.
1203 ;;; (define-ccl-program ccl-encode-koi8
1204 ;;; `(1
1205 ;;; ((loop
1206 ;;; (read-multibyte-character r0 r1)
1207 ;;; (translate-character cyrillic-koi8-r-encode-table r0 r1)
1208 ;;; (write-repeat r1))))
1209 ;;; "CCL program to encode KOI8.")
1210 ;;; (let ((table (get 'cyrillic-alternativnyj-encode-table 'translation-table)))
1211 ;;; (map-char-table
1212 ;;; (lambda (k v)
1213 ;;; (aset table
1214 ;;; (or (aref ucs-8859-5-encode-table k)
1215 ;;; k)
1216 ;;; v))
1217 ;;; table)
1218 ;;; (register-char-codings 'cyrillic-alternativnyj table))
1219 ;;; (let ((table (get 'cyrillic-alternativnyj-nonascii-translation-table
1220 ;;; 'translation-table)))
1221 ;;; (map-char-table
1222 ;;; (lambda (k v)
1223 ;;; (if v (aset table
1224 ;;; k
1225 ;;; (or (aref ucs-mule-8859-to-mule-unicode v)
1226 ;;; v))))
1227 ;;; table))
1228 )
1229 1197
1230 (defun ucs-fragment-8859 (&optional encode-only) 1198 (defun ucs-fragment-8859 (&optional encode-only)
1231 "Undo the unification done by `ucs-unify-8859'. 1199 "Undo the unification done by `ucs-unify-8859'.
1232 With prefix arg, undo unification on encoding only, i.e. don't undo 1200 With prefix arg, undo unification on encoding only, i.e. don't undo
1233 unification on input operations." 1201 unification on input operations."
1234 (interactive "P") 1202 (interactive "P")
1235 ;; Maybe fix decoding. 1203 ;; Maybe fix decoding.
1236 (unless encode-only 1204 (unless encode-only
1237 ;; Unify 8859 on decoding. (Non-CCL coding systems only.) 1205 ;; Unify 8859 on decoding. (Non-CCL coding systems only.)
1238 (unify-8859-on-decoding-mode -1)) 1206 (set-char-table-parent standard-translation-table-for-decode nil)
1207 (setq-default translation-table-for-input nil))
1239 ;; Fix encoding. For each charset, remove the entries in 1208 ;; Fix encoding. For each charset, remove the entries in
1240 ;; `char-coding-system-table' added to its safe-chars table (as its 1209 ;; `char-coding-system-table' added to its safe-chars table (as its
1241 ;; parent). 1210 ;; parent).
1242 (dolist (n '(1 2 3 4 5 7 8 9 14 15)) 1211 (dolist (n '(1 2 3 4 5 7 8 9 14 15))
1243 (let* ((coding-system 1212 (let* ((coding-system
1251 (let ((codings (aref char-coding-system-table key))) 1220 (let ((codings (aref char-coding-system-table key)))
1252 (aset char-coding-system-table key 1221 (aset char-coding-system-table key
1253 (delq coding-system codings))))) 1222 (delq coding-system codings)))))
1254 (char-table-parent safe)) 1223 (char-table-parent safe))
1255 (set-char-table-parent safe nil) 1224 (set-char-table-parent safe nil)
1256 (coding-system-put coding-system 'translation-table-for-encode nil)))) 1225 (coding-system-put coding-system 'translation-table-for-encode nil)
1226 (coding-system-put coding-system 'translation-table-for-input nil)))
1227 (optimize-char-table char-coding-system-table)
1228 (remove-hook 'quail-activate-hook 'ucs-quail-activate)
1229 (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup))
1257 1230
1258 (define-minor-mode unify-8859-on-encoding-mode 1231 (define-minor-mode unify-8859-on-encoding-mode
1259 "Set up translation tables for unifying ISO 8859 characters on encoding. 1232 "Set up translation tables for unifying ISO 8859 characters on encoding.
1260 1233
1261 The ISO 8859 characters sets overlap, e.g. 8859-1 (Latin-1) and 1234 The ISO 8859 characters sets overlap, e.g. 8859-1 (Latin-1) and
1274 unified representation) in a buffer saved as Latin-9 will be encoded 1247 unified representation) in a buffer saved as Latin-9 will be encoded
1275 directly to a byte value 233. By default, in contrast, you would be 1248 directly to a byte value 233. By default, in contrast, you would be
1276 prompted for a general coding system to use for saving the file, which 1249 prompted for a general coding system to use for saving the file, which
1277 can cope with separate Latin-1 and Latin-9 representations of e-acute. 1250 can cope with separate Latin-1 and Latin-9 representations of e-acute.
1278 1251
1252 Also sets hooks that arrange `translation-table-for-input' to be set
1253 up locally when Quail input methods are activated. This will often
1254 allow input generated by Quail input methods to conform with what the
1255 buffer's file coding system can encode. Thus you could use a Latin-2
1256 input method to search for e-acute in a Latin-1 buffer.
1257
1279 See also command `unify-8859-on-decoding-mode'." 1258 See also command `unify-8859-on-decoding-mode'."
1280 :group 'mule 1259 :group 'mule
1281 :global t 1260 :global t
1282 :version "21.3" ; who knows...?
1283 :init-value nil 1261 :init-value nil
1284 (if unify-8859-on-encoding-mode 1262 (if unify-8859-on-encoding-mode
1285 (ucs-unify-8859 t) 1263 (ucs-unify-8859 t)
1286 (ucs-fragment-8859 t))) 1264 (ucs-fragment-8859 t)))
1287 1265
1266 (custom-add-version 'unify-8859-on-encoding-mode "21.4")
1267
1288 (define-minor-mode unify-8859-on-decoding-mode 1268 (define-minor-mode unify-8859-on-decoding-mode
1289 "Set up translation table for unifying ISO 8859 characters on decoding. 1269 "Set up translation tables for unifying ISO 8859 characters on decoding.
1290 On decoding -- i.e. input operations -- non-ASCII characters from the 1270 On decoding, i.e. input operations, non-ASCII characters from the
1291 built-in ISO 8859 charsets are unified by mapping them into the 1271 built-in ISO 8859 charsets are unified by mapping them into the
1292 `iso-latin-1' and `mule-unicode-0100-24ff' charsets. 1272 `iso-latin-1' and `mule-unicode-0100-24ff' charsets.
1293 1273
1294 This sets the parent of `standard-translation-table-for-decode'.
1295 Also sets `translation-table-for-input' globally, so that Quail input 1274 Also sets `translation-table-for-input' globally, so that Quail input
1296 methods produce unified characters. 1275 methods produce unified characters.
1297 1276
1298 See also command `unify-8859-on-encoding-mode'." 1277 See also command `unify-8859-on-encoding-mode' and the user option
1278 `utf-8-fragment-on-decoding'."
1299 :group 'mule 1279 :group 'mule
1300 :global t 1280 :global t
1301 :version "21.3" ; who knows...?
1302 :init-value nil 1281 :init-value nil
1303 (let ((table (if unify-8859-on-decoding-mode ucs-mule-8859-to-mule-unicode))) 1282 (if unify-8859-on-decoding-mode
1304 (set-char-table-parent standard-translation-table-for-decode table) 1283 (ucs-unify-8859)
1305 (setq-default translation-table-for-input table))) 1284 (ucs-fragment-8859)))
1285
1286 (custom-add-dependencies 'unify-8859-on-decoding-mode
1287 '(utf-8-fragment-on-decoding))
1288 (custom-add-version 'unify-8859-on-decoding-mode "21.4")
1306 1289
1307 (defun ucs-insert (arg) 1290 (defun ucs-insert (arg)
1308 "Insert the Emacs character representation of the given Unicode. 1291 "Insert the Emacs character representation of the given Unicode.
1309 Interactively, prompts for a hex string giving the code." 1292 Interactively, prompts for a hex string giving the code."
1310 (interactive "sUnicode (hex): ") 1293 (interactive "sUnicode (hex): ")
1311 (insert (or (decode-char 'ucs (if (integerp arg) 1294 (let ((c (decode-char 'ucs (if (integerp arg)
1312 arg 1295 arg
1313 (string-to-number arg 16))) 1296 (string-to-number arg 16)))))
1314 (error "Unknown Unicode character")))) 1297 (if c
1298 (insert c)
1299 (error "Character can't be decoded to UCS"))))
1315 1300
1316 ;;; Dealing with non-8859 character sets. 1301 ;;; Dealing with non-8859 character sets.
1317 1302
1318 ;; We only set up translation on encoding to utf-8. Also translation 1303 ;; We only set up translation on encoding to utf-8. Also translation
1319 ;; tables ucs-CS-encode-table are constructed for some coding systems 1304 ;; tables ucs-CS-encode-table are constructed for some coding systems
2456 (make-translation-table))))) 2441 (make-translation-table)))))
2457 (dolist (pair (symbol-value cs)) 2442 (dolist (pair (symbol-value cs))
2458 (aset ucs-mule-to-mule-unicode (car pair) (cdr pair)) 2443 (aset ucs-mule-to-mule-unicode (car pair) (cdr pair))
2459 (if encode-translator 2444 (if encode-translator
2460 (aset encode-translator (cdr pair) (car pair)))) 2445 (aset encode-translator (cdr pair) (car pair))))
2446 (if encode-translator
2447 (optimize-char-table encode-translator))
2461 (if (charsetp cs) 2448 (if (charsetp cs)
2462 (push cs safe-charsets) 2449 (push cs safe-charsets)
2463 (setq safe-charsets 2450 (setq safe-charsets
2464 (append (delq 'ascii (coding-system-get cs 'safe-charsets)) 2451 (append (delq 'ascii (coding-system-get cs 'safe-charsets))
2465 safe-charsets))))) 2452 safe-charsets)))
2453 (cond ((eq cs 'vietnamese-viscii)
2454 (coding-system-put 'vietnamese-viscii
2455 'translation-table-for-input
2456 encode-translator)
2457 (coding-system-put 'vietnamese-viqr
2458 'translation-table-for-input
2459 encode-translator))
2460 ((memq cs '(lao thai-tis620 tibetan-iso-8bit))
2461 (coding-system-put cs 'translation-table-for-input cs)))))
2462 (optimize-char-table ucs-mule-to-mule-unicode)
2466 (dolist (c safe-charsets) 2463 (dolist (c safe-charsets)
2467 (aset table (make-char c) t)) 2464 (aset table (make-char c) t))
2468 (coding-system-put 'mule-utf-8 'safe-charsets 2465 (coding-system-put 'mule-utf-8 'safe-charsets
2469 (append (coding-system-get 'mule-utf-8 'safe-charsets) 2466 (append (coding-system-get 'mule-utf-8 'safe-charsets)
2470 safe-charsets)) 2467 safe-charsets))
2471 (register-char-codings 'mule-utf-8 table))) 2468 (register-char-codings 'mule-utf-8 table)))
2472 2469
2470 (defvar translation-table-for-input (make-translation-table))
2471
2472 ;; Arrange to set up the translation table for Quail. This probably
2473 ;; isn't foolproof.
2474 (defun ucs-quail-activate ()
2475 "Set up an appropriate `translation-table-for-input' for current buffer.
2476 Intended to be added to `quail-activate-hook'."
2477 (let ((cs (coding-system-base buffer-file-coding-system)))
2478 (if (eq cs 'undecided)
2479 (setq cs (coding-system-base default-buffer-file-coding-system)))
2480 (if (coding-system-get cs 'translation-table-for-input)
2481 (set (make-variable-buffer-local 'translation-table-for-input)
2482 (coding-system-get cs 'translation-table-for-input)))))
2483
2484 ;; The minibuffer needs to acquire a `buffer-file-coding-system' for
2485 ;; the above to work in it.
2486 (defun ucs-minibuffer-setup ()
2487 "Set up an appropriate `buffer-file-coding-system' for current buffer.
2488 Does so by inheriting it from the cadr of the current buffer list.
2489 Intended to be added to `minibuffer-setup-hook'."
2490 (set (make-local-variable 'buffer-file-coding-system)
2491 (with-current-buffer (cadr (buffer-list))
2492 buffer-file-coding-system)))
2493
2494 ;; Modified to allow display of arbitrary characters with an
2495 ;; iso-10646-encoded (`Unicode') font.
2496 (define-ccl-program ccl-encode-unicode-font
2497 `(0
2498 ((if (r0 == ,(charset-id 'ascii))
2499 ((r2 = r1)
2500 (r1 = 0))
2501 (
2502 ;; Look for a translation for non-ASCII chars. For a 2D
2503 ;; charset, produce a single code for the translation.
2504 ;; Official 2D sets are in the charset id range [#x90,#x99],
2505 ;; private ones in the range [#xf0,#xfe] (with #xff not used).
2506 ;; Fixme: Is there a better way to do this?
2507 (r3 = (r0 >= #x90))
2508 (r3 &= (r0 <= #x99))
2509 (r3 |= (r0 >= #xf0))
2510 (if r3 ; 2D input
2511 (r1 = ((r1 << 7) | r2)))
2512 (translate-character ucs-mule-to-mule-unicode r0 r1)
2513 (r3 = (r0 >= #x90))
2514 (r3 &= (r0 <= #x99))
2515 (r3 |= (r0 >= #xf0))
2516 (if r3 ; 2D translation
2517 ((r2 = (r1 & 127))
2518 (r1 = (r1 >> 7))))
2519 (if (r0 == ,(charset-id 'latin-iso8859-1))
2520 ((r2 = (r1 + 128))
2521 (r1 = 0))
2522 (if (r0 == ,(charset-id 'mule-unicode-0100-24ff))
2523 ((r1 *= 96)
2524 (r1 += r2)
2525 (r1 += ,(- #x100 (* 32 96) 32))
2526 (r1 >8= 0)
2527 (r2 = r7))
2528 (if (r0 == ,(charset-id 'mule-unicode-2500-33ff))
2529 ((r1 *= 96)
2530 (r1 += r2)
2531 (r1 += ,(- #x2500 (* 32 96) 32))
2532 (r1 >8= 0)
2533 (r2 = r7))
2534 (if (r0 == ,(charset-id 'mule-unicode-e000-ffff))
2535 ((r1 *= 96)
2536 (r1 += r2)
2537 (r1 += ,(- #xe000 (* 32 96) 32))
2538 (r1 >8= 0)
2539 (r2 = r7))))))))))
2540 "Encode characters for display with iso10646 font.
2541 Translate through table `ucs-mule-to-mule-unicode' initially.")
2542
2543 (defalias 'ucs-tables-unload-hook 'ucs-fragment-8859)
2544
2473 (provide 'ucs-tables) 2545 (provide 'ucs-tables)
2474 2546
2475 ;;; ucs-tables.el ends here 2547 ;;; ucs-tables.el ends here