Mercurial > emacs
comparison lisp/international/ucs-tables.el @ 46506:a7f933a7b003
Optimize tables. Deal with some
non-8859 charsets.
(ucs-mule-to-mule-unicode): New.
(ucs-unify-8859): Use utf-8-fragment-on-decoding, set up Quail
translation.
(ucs-fragment-8859): Modified consistent with ucs-unify-8859.
(unify-8859-on-encoding-mode): Doc mod. Fix custom version.
(unify-8859-on-decoding-mode): Doc mod. Change code. Fix custom
version. Add custom dependencies.
(ucs-insert): Check for null from decode-char.
(translation-table-for-input, ucs-quail-activate)
(ucs-minibuffer-setup, ccl-encode-unicode-font)
(ucs-tables-unload-hook): New.
author | Dave Love <fx@gnu.org> |
---|---|
date | Wed, 17 Jul 2002 19:21:41 +0000 |
parents | aee6ae77d23b |
children | 6262acd6c7ac |
comparison
equal
deleted
inserted
replaced
46505:005d282a48ed | 46506:a7f933a7b003 |
---|---|
23 ;; Boston, MA 02111-1307, USA. | 23 ;; Boston, MA 02111-1307, USA. |
24 | 24 |
25 ;;; Commentary: | 25 ;;; Commentary: |
26 | 26 |
27 ;; This file provides tables mapping between Unicode numbers and | 27 ;; This file provides tables mapping between Unicode numbers and |
28 ;; emacs-mule characters from the iso8859 charsets (and others). It | 28 ;; emacs-mule characters from the iso-8859 charsets (and others). It |
29 ;; also provides some auxiliary functions. | 29 ;; also provides some auxiliary functions. |
30 | 30 |
31 ;; These tables are used to construct other mappings between the Mule | 31 ;; These tables are used to construct other mappings between the Mule |
32 ;; iso8859 charsets and the emacs-unicode charsets and a table that | 32 ;; iso8859 charsets and the emacs-unicode charsets and a table that |
33 ;; unifies iso8859 characters using a single charset as far as | 33 ;; unifies iso-8859 characters using a single charset as far as |
34 ;; possible. These tables are used by latin1-disp.el to display some | 34 ;; possible. These tables are used by latin1-disp.el to display some |
35 ;; Unicode characters without a Unicode font and by utf-8.el to unify | 35 ;; Unicode characters without a Unicode font and by utf-8.el to unify |
36 ;; Latin-N as far as possible on encoding. | 36 ;; Latin-N as far as possible on encoding. |
37 | 37 |
38 ;; More drastically, they can be used to unify 8859 into Latin-1 plus | 38 ;; More drastically, they can be used to unify 8859 into Latin-1 plus |
42 ;; which are supposed to contain distinct 8859 charsets. Also, it can | 42 ;; which are supposed to contain distinct 8859 charsets. Also, it can |
43 ;; make reading and writing of emacs-mule and iso-2022-based encodings | 43 ;; make reading and writing of emacs-mule and iso-2022-based encodings |
44 ;; not idempotent. | 44 ;; not idempotent. |
45 | 45 |
46 ;; Global minor modes are provided to unify on encoding and decoding. | 46 ;; Global minor modes are provided to unify on encoding and decoding. |
47 | 47 ;; These could be extended to non-iso-8859 charsets. However 8859 is |
48 ;; The translation table `ucs-mule-to-mule-unicode' is populated. | 48 ;; all that users normally care about unifying although, for instance, |
49 ;; This is used by the `mule-utf-8' coding system to encode extra | 49 ;; Greek occurs in as many as nine Emacs charsets. |
50 ;; characters. | 50 |
51 ;; The translation table `ucs-mule-to-mule-unicode' is populated, | |
52 ;; which could be used for more general unification on decoding. This | |
53 ;; is used by the `mule-utf-8' coding system to encode extra | |
54 ;; characters, and also by the coding systems set up by code-pages.el. | |
55 ;; The decoding tables here take account of | |
56 ;; `utf-8-fragment-on-decoding' which may specify decoding Greek and | |
57 ;; Cyrillic into 8859 charsets. | |
58 | |
59 ;; Unification also puts a `translation-table-for-input' property on | |
60 ;; relevant coding coding systems and arranges for the | |
61 ;; `translation-table-for-input' variable to be set either globally or | |
62 ;; locally. This is used by Quail input methods to translate input | |
63 ;; characters appropriately for the buffer's coding system (if | |
64 ;; possible). Unification on decoding sets it globally to translate | |
65 ;; to Unicode. Unification on encoding uses hooks to set it up | |
66 ;; locally to buffers. Thus in the latter case, typing `"a' into a | |
67 ;; Latin-1 buffer using the `latin-2-prefix' method translates the | |
68 ;; generated latin-iso8859-2 `,Bd(B' into latin-iso8859-1 `,Ad(B'. | |
69 | |
70 ;; NB, this code depends on the default value of | |
71 ;; `enable-character-translation'. (Making it nil would anyway lead | |
72 ;; to inconsistent behaviour between CCL-based coding systems which | |
73 ;; use explicit translation tables and the rest.) | |
51 | 74 |
52 ;; Command `ucs-insert' is convenient for inserting a given Unicode. | 75 ;; Command `ucs-insert' is convenient for inserting a given Unicode. |
53 ;; (See also the `ucs' input method.) | 76 ;; (See also the `ucs' input method.) |
77 | |
78 ;; A replacement CCL program is provided which allows characters in | |
79 ;; the `ucs-mule-to-mule-unicode' table to be displayed with an | |
80 ;; iso-10646-encoded font. E.g. to use a `Unicode' font for Cyrillic: | |
81 ;; | |
82 ;; (set-fontset-font "fontset-startup" | |
83 ;; (cons (make-char 'cyrillic-iso8859-5 160) | |
84 ;; (make-char 'cyrillic-iso8859-5 255)) | |
85 ;; '(nil . "ISO10646-1")) | |
54 | 86 |
55 ;;; Code: | 87 ;;; Code: |
56 | 88 |
57 ;;; Define tables, to be populated later. | 89 ;;; Define tables, to be populated later. |
58 | 90 |
1065 l) | 1097 l) |
1066 (while (< i 256) | 1098 (while (< i 256) |
1067 (push (cons (make-char 'latin-iso8859-1 (- i 128)) i) | 1099 (push (cons (make-char 'latin-iso8859-1 (- i 128)) i) |
1068 l) | 1100 l) |
1069 (setq i (1+ i))) | 1101 (setq i (1+ i))) |
1070 (nreverse l))) | 1102 (nreverse l)))) |
1071 | |
1072 ;; (case-table (standard-case-table)) | |
1073 ;; (syntax-table (standard-syntax-table)) | |
1074 ) | |
1075 | 1103 |
1076 ;; Convert the lists to the basic char tables. | 1104 ;; Convert the lists to the basic char tables. |
1077 (dolist (n (list 15 14 9 8 7 5 4 3 2 1)) | 1105 (dolist (n (list 15 14 9 8 7 5 4 3 2 1)) |
1078 (let ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n))))) | 1106 (let ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n))))) |
1079 (dolist (pair alist) | 1107 (dolist (pair alist) |
1082 (mu (decode-char 'ucs (cdr pair)))) | 1110 (mu (decode-char 'ucs (cdr pair)))) |
1083 (aset ucs-mule-8859-to-ucs-table mule uc) | 1111 (aset ucs-mule-8859-to-ucs-table mule uc) |
1084 ;; (aset ucs-ucs-to-mule-8859-table uc mule) | 1112 ;; (aset ucs-ucs-to-mule-8859-table uc mule) |
1085 ;; (aset ucs-mule-unicode-to-mule-8859 mu mule) | 1113 ;; (aset ucs-mule-unicode-to-mule-8859 mu mule) |
1086 (aset ucs-mule-8859-to-mule-unicode mule mu) | 1114 (aset ucs-mule-8859-to-mule-unicode mule mu) |
1087 (aset ucs-mule-to-mule-unicode mule mu))) | 1115 (aset ucs-mule-to-mule-unicode mule mu))))) |
1088 ;; I think this is actually done OK in characters.el. | 1116 ;; The table optimizing here and elsewhere probably isn't very |
1089 ;; Probably things like accents shouldn't have word syntax, but the | 1117 ;; useful, but seems good practice. |
1090 ;; Latin-N syntax tables currently aren't consistent for such | 1118 (optimize-char-table ucs-mule-to-mule-unicode) |
1091 ;; characters anyhow. | 1119 (optimize-char-table ucs-mule-8859-to-mule-unicode) |
1092 ;; ;; Make the mule-unicode characters inherit syntax and case info | |
1093 ;; ;; if they don't already have it. | |
1094 ;; (dolist (pair alist) | |
1095 ;; (let ((mule (car pair)) | |
1096 ;; (uc (cdr pair)) | |
1097 ;; (mu (decode-char 'ucs (cdr pair)))) | |
1098 ;; (let ((syntax (aref syntax-table mule))) | |
1099 ;; (if (eq mule (downcase mule)) | |
1100 ;; (if (eq mule (upcase mule)) ; non-letter or uncased letter | |
1101 ;; (progn | |
1102 ;; (if (= 4 (car syntax)) ; left delim | |
1103 ;; (progn | |
1104 ;; (aset syntax-table | |
1105 ;; mu | |
1106 ;; (cons 4 (aref ucs-mule-8859-to-mule-unicode | |
1107 ;; (cdr syntax)))) | |
1108 ;; (aset syntax-table | |
1109 ;; (aref ucs-mule-8859-to-mule-unicode | |
1110 ;; (cdr syntax)) | |
1111 ;; (cons 5 mu))) | |
1112 ;; (aset syntax-table mu syntax)) | |
1113 ;; (aset case-table mu mu))) | |
1114 ;; ;; Upper case letter | |
1115 ;; (let ((lower (aref ucs-mule-8859-to-mule-unicode | |
1116 ;; (aref case-table mule)))) | |
1117 ;; (aset case-table mu lower) | |
1118 ;; (aset case-table lower lower) | |
1119 ;; (modify-syntax-entry lower "w " syntax-table) | |
1120 ;; (modify-syntax-entry mu "w " syntax-table)))))) | |
1121 )) | |
1122 ;; Derive tables that can be used as per-coding-system | 1120 ;; Derive tables that can be used as per-coding-system |
1123 ;; `translation-table-for-encode's. | 1121 ;; `translation-table-for-encode's. |
1124 (dolist (n (list 15 14 9 8 7 5 4 3 2 1)) | 1122 (dolist (n (list 15 14 9 8 7 5 4 3 2 1)) |
1125 (let* ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n)))) | 1123 (let* ((alist (symbol-value (intern (format "ucs-8859-%d-alist" n)))) |
1126 (encode-translator (set (intern (format "ucs-8859-%d-encode-table" | 1124 (encode-translator (set (intern (format "ucs-8859-%d-encode-table" |
1136 ;; unicode as some character in this set. | 1134 ;; unicode as some character in this set. |
1137 (map-char-table (lambda (k v) | 1135 (map-char-table (lambda (k v) |
1138 (if (and (setq elt (rassq v alist)) | 1136 (if (and (setq elt (rassq v alist)) |
1139 (not (assq k alist))) | 1137 (not (assq k alist))) |
1140 (aset encode-translator k (car elt)))) | 1138 (aset encode-translator k (car elt)))) |
1141 ucs-mule-8859-to-ucs-table)))) | 1139 ucs-mule-8859-to-ucs-table) |
1140 (optimize-char-table encode-translator)))) | |
1142 | 1141 |
1143 ;; Register for use in CCL. | 1142 ;; Register for use in CCL. |
1144 (define-translation-table 'ucs-mule-8859-to-mule-unicode | 1143 (define-translation-table 'ucs-mule-8859-to-mule-unicode |
1145 ucs-mule-8859-to-mule-unicode) | 1144 ucs-mule-8859-to-mule-unicode) |
1146 | 1145 (define-translation-table 'ucs-mule-to-mule-unicode |
1147 ;; Fixme: Make this reversible, which means frobbing | 1146 ucs-mule-to-mule-unicode) |
1148 ;; `char-coding-system-table' directly to remove what we added -- see | 1147 |
1149 ;; codepages.el. Also make it a user option. | |
1150 (defun ucs-unify-8859 (&optional encode-only) | 1148 (defun ucs-unify-8859 (&optional encode-only) |
1151 "Set up translation tables for unifying characters from ISO 8859. | 1149 "Set up translation tables for unifying characters from ISO 8859. |
1152 | 1150 |
1153 On decoding, non-ASCII characters are mapped into the `iso-latin-1' | 1151 On decoding, non-ASCII characters are mapped into the `iso-latin-1' |
1154 and `mule-unicode-0100-24ff' charsets. On encoding, these are mapped | 1152 and `mule-unicode-0100-24ff' charsets. On encoding, these are mapped |
1157 With prefix arg, do unification on encoding only, i.e. don't unify | 1155 With prefix arg, do unification on encoding only, i.e. don't unify |
1158 everything on input operations." | 1156 everything on input operations." |
1159 (interactive "P") | 1157 (interactive "P") |
1160 (unless encode-only | 1158 (unless encode-only |
1161 ;; Unify 8859 on decoding. (Non-CCL coding systems only.) | 1159 ;; Unify 8859 on decoding. (Non-CCL coding systems only.) |
1162 (unify-8859-on-decoding-mode 1)) | 1160 (if utf-8-fragment-on-decoding |
1161 (map-char-table | |
1162 (lambda (k v) | |
1163 (if v (aset ucs-mule-to-mule-unicode v nil))) | |
1164 utf-8-translation-table-for-decode) | |
1165 ;; Reset in case it was changed. | |
1166 (map-char-table | |
1167 (lambda (k v) | |
1168 (if v (aset ucs-mule-to-mule-unicode v k))) | |
1169 utf-8-translation-table-for-decode)) | |
1170 (set-char-table-parent standard-translation-table-for-decode | |
1171 ucs-mule-8859-to-mule-unicode) | |
1172 ;; Translate Quail input globally. | |
1173 (setq-default translation-table-for-input ucs-mule-to-mule-unicode) | |
1174 ;; In case these are set up, but we should use the global | |
1175 ;; translation table. | |
1176 (remove-hook 'quail-activate-hook 'ucs-quail-activate) | |
1177 (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) | |
1163 ;; Adjust the 8859 coding systems to fragment the unified characters | 1178 ;; Adjust the 8859 coding systems to fragment the unified characters |
1164 ;; on encoding. | 1179 ;; on encoding. |
1165 (dolist (n '(1 2 3 4 5 7 8 9 14 15)) | 1180 (dolist (n '(1 2 3 4 5 7 8 9 14 15)) |
1166 (let* ((coding-system | 1181 (let* ((coding-system |
1167 (coding-system-base (intern (format "iso-8859-%d" n)))) | 1182 (coding-system-base (intern (format "iso-8859-%d" n)))) |
1172 ;; used after they've been registered, but we might as well | 1187 ;; used after they've been registered, but we might as well |
1173 ;; record them. Setting the parent here is a convenience. | 1188 ;; record them. Setting the parent here is a convenience. |
1174 (set-char-table-parent safe table) | 1189 (set-char-table-parent safe table) |
1175 ;; Update the table of what encodes to what. | 1190 ;; Update the table of what encodes to what. |
1176 (register-char-codings coding-system table) | 1191 (register-char-codings coding-system table) |
1177 (coding-system-put coding-system 'translation-table-for-encode table))) | 1192 (coding-system-put coding-system 'translation-table-for-encode table) |
1178 | 1193 (coding-system-put coding-system 'translation-table-for-input table))) |
1179 ;;; The following works for the bundled coding systems, but it's | 1194 ;; Arrange local translation tables for Quail input. |
1180 ;;; better to use the Unicode-based ones and make it irrelevant. | 1195 (add-hook 'quail-activate-hook 'ucs-quail-activate) |
1181 | 1196 (add-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) |
1182 ;;; ;; Update the Cyrillic special cases. | |
1183 ;;; ;; `translation-table-for-encode' doesn't work for CCL coding | |
1184 ;;; ;; systems, and `standard-translation-table-for-decode' isn't | |
1185 ;;; ;; applied. | |
1186 ;;; (let ((table (get 'cyrillic-koi8-r-encode-table 'translation-table))) | |
1187 ;;; (map-char-table | |
1188 ;;; (lambda (k v) | |
1189 ;;; (aset table | |
1190 ;;; (or (aref ucs-8859-5-encode-table k) | |
1191 ;;; k) | |
1192 ;;; v)) | |
1193 ;;; table) | |
1194 ;;; (register-char-codings 'cyrillic-koi8 table)) | |
1195 ;;; (let ((table (get 'cyrillic-koi8-r-nonascii-translation-table | |
1196 ;;; 'translation-table))) | |
1197 ;;; (map-char-table | |
1198 ;;; (lambda (k v) | |
1199 ;;; (if v (aset table k (or (aref ucs-mule-8859-to-mule-unicode v) | |
1200 ;;; v)))) | |
1201 ;;; table)) | |
1202 ;;; ;; Redefine this, since the orginal only translated 8859-5. | |
1203 ;;; (define-ccl-program ccl-encode-koi8 | |
1204 ;;; `(1 | |
1205 ;;; ((loop | |
1206 ;;; (read-multibyte-character r0 r1) | |
1207 ;;; (translate-character cyrillic-koi8-r-encode-table r0 r1) | |
1208 ;;; (write-repeat r1)))) | |
1209 ;;; "CCL program to encode KOI8.") | |
1210 ;;; (let ((table (get 'cyrillic-alternativnyj-encode-table 'translation-table))) | |
1211 ;;; (map-char-table | |
1212 ;;; (lambda (k v) | |
1213 ;;; (aset table | |
1214 ;;; (or (aref ucs-8859-5-encode-table k) | |
1215 ;;; k) | |
1216 ;;; v)) | |
1217 ;;; table) | |
1218 ;;; (register-char-codings 'cyrillic-alternativnyj table)) | |
1219 ;;; (let ((table (get 'cyrillic-alternativnyj-nonascii-translation-table | |
1220 ;;; 'translation-table))) | |
1221 ;;; (map-char-table | |
1222 ;;; (lambda (k v) | |
1223 ;;; (if v (aset table | |
1224 ;;; k | |
1225 ;;; (or (aref ucs-mule-8859-to-mule-unicode v) | |
1226 ;;; v)))) | |
1227 ;;; table)) | |
1228 ) | |
1229 | 1197 |
1230 (defun ucs-fragment-8859 (&optional encode-only) | 1198 (defun ucs-fragment-8859 (&optional encode-only) |
1231 "Undo the unification done by `ucs-unify-8859'. | 1199 "Undo the unification done by `ucs-unify-8859'. |
1232 With prefix arg, undo unification on encoding only, i.e. don't undo | 1200 With prefix arg, undo unification on encoding only, i.e. don't undo |
1233 unification on input operations." | 1201 unification on input operations." |
1234 (interactive "P") | 1202 (interactive "P") |
1235 ;; Maybe fix decoding. | 1203 ;; Maybe fix decoding. |
1236 (unless encode-only | 1204 (unless encode-only |
1237 ;; Unify 8859 on decoding. (Non-CCL coding systems only.) | 1205 ;; Unify 8859 on decoding. (Non-CCL coding systems only.) |
1238 (unify-8859-on-decoding-mode -1)) | 1206 (set-char-table-parent standard-translation-table-for-decode nil) |
1207 (setq-default translation-table-for-input nil)) | |
1239 ;; Fix encoding. For each charset, remove the entries in | 1208 ;; Fix encoding. For each charset, remove the entries in |
1240 ;; `char-coding-system-table' added to its safe-chars table (as its | 1209 ;; `char-coding-system-table' added to its safe-chars table (as its |
1241 ;; parent). | 1210 ;; parent). |
1242 (dolist (n '(1 2 3 4 5 7 8 9 14 15)) | 1211 (dolist (n '(1 2 3 4 5 7 8 9 14 15)) |
1243 (let* ((coding-system | 1212 (let* ((coding-system |
1251 (let ((codings (aref char-coding-system-table key))) | 1220 (let ((codings (aref char-coding-system-table key))) |
1252 (aset char-coding-system-table key | 1221 (aset char-coding-system-table key |
1253 (delq coding-system codings))))) | 1222 (delq coding-system codings))))) |
1254 (char-table-parent safe)) | 1223 (char-table-parent safe)) |
1255 (set-char-table-parent safe nil) | 1224 (set-char-table-parent safe nil) |
1256 (coding-system-put coding-system 'translation-table-for-encode nil)))) | 1225 (coding-system-put coding-system 'translation-table-for-encode nil) |
1226 (coding-system-put coding-system 'translation-table-for-input nil))) | |
1227 (optimize-char-table char-coding-system-table) | |
1228 (remove-hook 'quail-activate-hook 'ucs-quail-activate) | |
1229 (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)) | |
1257 | 1230 |
1258 (define-minor-mode unify-8859-on-encoding-mode | 1231 (define-minor-mode unify-8859-on-encoding-mode |
1259 "Set up translation tables for unifying ISO 8859 characters on encoding. | 1232 "Set up translation tables for unifying ISO 8859 characters on encoding. |
1260 | 1233 |
1261 The ISO 8859 characters sets overlap, e.g. 8859-1 (Latin-1) and | 1234 The ISO 8859 characters sets overlap, e.g. 8859-1 (Latin-1) and |
1274 unified representation) in a buffer saved as Latin-9 will be encoded | 1247 unified representation) in a buffer saved as Latin-9 will be encoded |
1275 directly to a byte value 233. By default, in contrast, you would be | 1248 directly to a byte value 233. By default, in contrast, you would be |
1276 prompted for a general coding system to use for saving the file, which | 1249 prompted for a general coding system to use for saving the file, which |
1277 can cope with separate Latin-1 and Latin-9 representations of e-acute. | 1250 can cope with separate Latin-1 and Latin-9 representations of e-acute. |
1278 | 1251 |
1252 Also sets hooks that arrange `translation-table-for-input' to be set | |
1253 up locally when Quail input methods are activated. This will often | |
1254 allow input generated by Quail input methods to conform with what the | |
1255 buffer's file coding system can encode. Thus you could use a Latin-2 | |
1256 input method to search for e-acute in a Latin-1 buffer. | |
1257 | |
1279 See also command `unify-8859-on-decoding-mode'." | 1258 See also command `unify-8859-on-decoding-mode'." |
1280 :group 'mule | 1259 :group 'mule |
1281 :global t | 1260 :global t |
1282 :version "21.3" ; who knows...? | |
1283 :init-value nil | 1261 :init-value nil |
1284 (if unify-8859-on-encoding-mode | 1262 (if unify-8859-on-encoding-mode |
1285 (ucs-unify-8859 t) | 1263 (ucs-unify-8859 t) |
1286 (ucs-fragment-8859 t))) | 1264 (ucs-fragment-8859 t))) |
1287 | 1265 |
1266 (custom-add-version 'unify-8859-on-encoding-mode "21.4") | |
1267 | |
1288 (define-minor-mode unify-8859-on-decoding-mode | 1268 (define-minor-mode unify-8859-on-decoding-mode |
1289 "Set up translation table for unifying ISO 8859 characters on decoding. | 1269 "Set up translation tables for unifying ISO 8859 characters on decoding. |
1290 On decoding -- i.e. input operations -- non-ASCII characters from the | 1270 On decoding, i.e. input operations, non-ASCII characters from the |
1291 built-in ISO 8859 charsets are unified by mapping them into the | 1271 built-in ISO 8859 charsets are unified by mapping them into the |
1292 `iso-latin-1' and `mule-unicode-0100-24ff' charsets. | 1272 `iso-latin-1' and `mule-unicode-0100-24ff' charsets. |
1293 | 1273 |
1294 This sets the parent of `standard-translation-table-for-decode'. | |
1295 Also sets `translation-table-for-input' globally, so that Quail input | 1274 Also sets `translation-table-for-input' globally, so that Quail input |
1296 methods produce unified characters. | 1275 methods produce unified characters. |
1297 | 1276 |
1298 See also command `unify-8859-on-encoding-mode'." | 1277 See also command `unify-8859-on-encoding-mode' and the user option |
1278 `utf-8-fragment-on-decoding'." | |
1299 :group 'mule | 1279 :group 'mule |
1300 :global t | 1280 :global t |
1301 :version "21.3" ; who knows...? | |
1302 :init-value nil | 1281 :init-value nil |
1303 (let ((table (if unify-8859-on-decoding-mode ucs-mule-8859-to-mule-unicode))) | 1282 (if unify-8859-on-decoding-mode |
1304 (set-char-table-parent standard-translation-table-for-decode table) | 1283 (ucs-unify-8859) |
1305 (setq-default translation-table-for-input table))) | 1284 (ucs-fragment-8859))) |
1285 | |
1286 (custom-add-dependencies 'unify-8859-on-decoding-mode | |
1287 '(utf-8-fragment-on-decoding)) | |
1288 (custom-add-version 'unify-8859-on-decoding-mode "21.4") | |
1306 | 1289 |
1307 (defun ucs-insert (arg) | 1290 (defun ucs-insert (arg) |
1308 "Insert the Emacs character representation of the given Unicode. | 1291 "Insert the Emacs character representation of the given Unicode. |
1309 Interactively, prompts for a hex string giving the code." | 1292 Interactively, prompts for a hex string giving the code." |
1310 (interactive "sUnicode (hex): ") | 1293 (interactive "sUnicode (hex): ") |
1311 (insert (or (decode-char 'ucs (if (integerp arg) | 1294 (let ((c (decode-char 'ucs (if (integerp arg) |
1312 arg | 1295 arg |
1313 (string-to-number arg 16))) | 1296 (string-to-number arg 16))))) |
1314 (error "Unknown Unicode character")))) | 1297 (if c |
1298 (insert c) | |
1299 (error "Character can't be decoded to UCS")))) | |
1315 | 1300 |
1316 ;;; Dealing with non-8859 character sets. | 1301 ;;; Dealing with non-8859 character sets. |
1317 | 1302 |
1318 ;; We only set up translation on encoding to utf-8. Also translation | 1303 ;; We only set up translation on encoding to utf-8. Also translation |
1319 ;; tables ucs-CS-encode-table are constructed for some coding systems | 1304 ;; tables ucs-CS-encode-table are constructed for some coding systems |
2456 (make-translation-table))))) | 2441 (make-translation-table))))) |
2457 (dolist (pair (symbol-value cs)) | 2442 (dolist (pair (symbol-value cs)) |
2458 (aset ucs-mule-to-mule-unicode (car pair) (cdr pair)) | 2443 (aset ucs-mule-to-mule-unicode (car pair) (cdr pair)) |
2459 (if encode-translator | 2444 (if encode-translator |
2460 (aset encode-translator (cdr pair) (car pair)))) | 2445 (aset encode-translator (cdr pair) (car pair)))) |
2446 (if encode-translator | |
2447 (optimize-char-table encode-translator)) | |
2461 (if (charsetp cs) | 2448 (if (charsetp cs) |
2462 (push cs safe-charsets) | 2449 (push cs safe-charsets) |
2463 (setq safe-charsets | 2450 (setq safe-charsets |
2464 (append (delq 'ascii (coding-system-get cs 'safe-charsets)) | 2451 (append (delq 'ascii (coding-system-get cs 'safe-charsets)) |
2465 safe-charsets))))) | 2452 safe-charsets))) |
2453 (cond ((eq cs 'vietnamese-viscii) | |
2454 (coding-system-put 'vietnamese-viscii | |
2455 'translation-table-for-input | |
2456 encode-translator) | |
2457 (coding-system-put 'vietnamese-viqr | |
2458 'translation-table-for-input | |
2459 encode-translator)) | |
2460 ((memq cs '(lao thai-tis620 tibetan-iso-8bit)) | |
2461 (coding-system-put cs 'translation-table-for-input cs))))) | |
2462 (optimize-char-table ucs-mule-to-mule-unicode) | |
2466 (dolist (c safe-charsets) | 2463 (dolist (c safe-charsets) |
2467 (aset table (make-char c) t)) | 2464 (aset table (make-char c) t)) |
2468 (coding-system-put 'mule-utf-8 'safe-charsets | 2465 (coding-system-put 'mule-utf-8 'safe-charsets |
2469 (append (coding-system-get 'mule-utf-8 'safe-charsets) | 2466 (append (coding-system-get 'mule-utf-8 'safe-charsets) |
2470 safe-charsets)) | 2467 safe-charsets)) |
2471 (register-char-codings 'mule-utf-8 table))) | 2468 (register-char-codings 'mule-utf-8 table))) |
2472 | 2469 |
2470 (defvar translation-table-for-input (make-translation-table)) | |
2471 | |
2472 ;; Arrange to set up the translation table for Quail. This probably | |
2473 ;; isn't foolproof. | |
2474 (defun ucs-quail-activate () | |
2475 "Set up an appropriate `translation-table-for-input' for current buffer. | |
2476 Intended to be added to `quail-activate-hook'." | |
2477 (let ((cs (coding-system-base buffer-file-coding-system))) | |
2478 (if (eq cs 'undecided) | |
2479 (setq cs (coding-system-base default-buffer-file-coding-system))) | |
2480 (if (coding-system-get cs 'translation-table-for-input) | |
2481 (set (make-variable-buffer-local 'translation-table-for-input) | |
2482 (coding-system-get cs 'translation-table-for-input))))) | |
2483 | |
2484 ;; The minibuffer needs to acquire a `buffer-file-coding-system' for | |
2485 ;; the above to work in it. | |
2486 (defun ucs-minibuffer-setup () | |
2487 "Set up an appropriate `buffer-file-coding-system' for current buffer. | |
2488 Does so by inheriting it from the cadr of the current buffer list. | |
2489 Intended to be added to `minibuffer-setup-hook'." | |
2490 (set (make-local-variable 'buffer-file-coding-system) | |
2491 (with-current-buffer (cadr (buffer-list)) | |
2492 buffer-file-coding-system))) | |
2493 | |
2494 ;; Modified to allow display of arbitrary characters with an | |
2495 ;; iso-10646-encoded (`Unicode') font. | |
2496 (define-ccl-program ccl-encode-unicode-font | |
2497 `(0 | |
2498 ((if (r0 == ,(charset-id 'ascii)) | |
2499 ((r2 = r1) | |
2500 (r1 = 0)) | |
2501 ( | |
2502 ;; Look for a translation for non-ASCII chars. For a 2D | |
2503 ;; charset, produce a single code for the translation. | |
2504 ;; Official 2D sets are in the charset id range [#x90,#x99], | |
2505 ;; private ones in the range [#xf0,#xfe] (with #xff not used). | |
2506 ;; Fixme: Is there a better way to do this? | |
2507 (r3 = (r0 >= #x90)) | |
2508 (r3 &= (r0 <= #x99)) | |
2509 (r3 |= (r0 >= #xf0)) | |
2510 (if r3 ; 2D input | |
2511 (r1 = ((r1 << 7) | r2))) | |
2512 (translate-character ucs-mule-to-mule-unicode r0 r1) | |
2513 (r3 = (r0 >= #x90)) | |
2514 (r3 &= (r0 <= #x99)) | |
2515 (r3 |= (r0 >= #xf0)) | |
2516 (if r3 ; 2D translation | |
2517 ((r2 = (r1 & 127)) | |
2518 (r1 = (r1 >> 7)))) | |
2519 (if (r0 == ,(charset-id 'latin-iso8859-1)) | |
2520 ((r2 = (r1 + 128)) | |
2521 (r1 = 0)) | |
2522 (if (r0 == ,(charset-id 'mule-unicode-0100-24ff)) | |
2523 ((r1 *= 96) | |
2524 (r1 += r2) | |
2525 (r1 += ,(- #x100 (* 32 96) 32)) | |
2526 (r1 >8= 0) | |
2527 (r2 = r7)) | |
2528 (if (r0 == ,(charset-id 'mule-unicode-2500-33ff)) | |
2529 ((r1 *= 96) | |
2530 (r1 += r2) | |
2531 (r1 += ,(- #x2500 (* 32 96) 32)) | |
2532 (r1 >8= 0) | |
2533 (r2 = r7)) | |
2534 (if (r0 == ,(charset-id 'mule-unicode-e000-ffff)) | |
2535 ((r1 *= 96) | |
2536 (r1 += r2) | |
2537 (r1 += ,(- #xe000 (* 32 96) 32)) | |
2538 (r1 >8= 0) | |
2539 (r2 = r7)))))))))) | |
2540 "Encode characters for display with iso10646 font. | |
2541 Translate through table `ucs-mule-to-mule-unicode' initially.") | |
2542 | |
2543 (defalias 'ucs-tables-unload-hook 'ucs-fragment-8859) | |
2544 | |
2473 (provide 'ucs-tables) | 2545 (provide 'ucs-tables) |
2474 | 2546 |
2475 ;;; ucs-tables.el ends here | 2547 ;;; ucs-tables.el ends here |