Mercurial > emacs
comparison lisp/international/utf-8.el @ 57727:c3945be39e09
(utf-translate-cjk-unicode-range-string):
New variable.
(utf-translate-cjk-set-unicode-range): New function.
(utf-translate-cjk-unicode-range): Make it customizable.
(utf-8-post-read-conversion): Use
utf-translate-cjk-unicode-range-string.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Wed, 27 Oct 2004 06:01:59 +0000 |
parents | 752ef76fcc08 |
children | e425df7605c9 |
comparison
equal
deleted
inserted
replaced
57726:66e97a54985f | 57727:c3945be39e09 |
---|---|
195 chinese-big5-1 chinese-big5-2 | 195 chinese-big5-1 chinese-big5-2 |
196 japanese-jisx0208 japanese-jisx0212 | 196 japanese-jisx0208 japanese-jisx0212 |
197 korean-ksc5601) | 197 korean-ksc5601) |
198 "List of charsets supported by `utf-translate-cjk-mode'.") | 198 "List of charsets supported by `utf-translate-cjk-mode'.") |
199 | 199 |
200 (defconst utf-translate-cjk-unicode-range | 200 (defvar utf-translate-cjk-lang-env nil |
201 '((#x2e80 . #xd7a3) | 201 "Language environment in which tables for `utf-translate-cjk-mode' is loaded. |
202 (#xff00 . #xffef)) | 202 The value nil means that the tables are not yet loaded.") |
203 "List of Unicode code ranges supported by `utf-translate-cjk-mode'.") | 203 |
204 (defvar utf-translate-cjk-unicode-range) | |
205 | |
206 ;; String generated from utf-translate-cjk-unicode-range. It is | |
207 ;; suitable for an argument to skip-chars-forward. | |
208 (defvar utf-translate-cjk-unicode-range-string nil) | |
209 | |
210 (defun utf-translate-cjk-set-unicode-range (range) | |
211 (setq utf-translate-cjk-unicode-range range) | |
212 (setq utf-translate-cjk-unicode-range-string | |
213 (let ((decode-char-no-trans | |
214 #'(lambda (x) | |
215 (cond ((< x #x100) (make-char 'latin-iso8859-1 x)) | |
216 ((< x #x2500) | |
217 (setq x (- x #x100)) | |
218 (make-char 'mule-unicode-0100-24ff | |
219 (+ (/ x 96) 32) (+ (% x 96) 32))) | |
220 ((< x #x3400) | |
221 (setq x (- x #x2500)) | |
222 (make-char 'mule-unicode-2500-33ff | |
223 (+ (/ x 96) 32) (+ (% x 96) 32))) | |
224 (t | |
225 (setq x (- x #xe000)) | |
226 (make-char 'mule-unicode-e000-ffff | |
227 (+ (/ x 96) 32) (+ (% x 96) 32)))))) | |
228 ranges from to) | |
229 (dolist (elt range) | |
230 (setq from (max #xA0 (car elt)) to (min #xffff (cdr elt))) | |
231 (if (and (>= to #x3400) (< to #xE000)) | |
232 (setq to #x33FF)) | |
233 (cond ((< from #x100) | |
234 (if (>= to #xE000) | |
235 (setq ranges (cons (cons #xE000 to) ranges) | |
236 to #x33FF)) | |
237 (if (>= to #x2500) | |
238 (setq ranges (cons (cons #x2500 to) ranges) | |
239 to #x24FF)) | |
240 (if (>= to #x100) | |
241 (setq ranges (cons (cons #x100 to) ranges) | |
242 to #xFF))) | |
243 ((< from #x2500) | |
244 (if (>= to #xE000) | |
245 (setq ranges (cons (cons #xE000 to) ranges) | |
246 to #x33FF)) | |
247 (if (>= to #x2500) | |
248 (setq ranges (cons (cons #x2500 to) ranges) | |
249 to #x24FF))) | |
250 ((< from #x3400) | |
251 (if (>= to #xE000) | |
252 (setq ranges (cons (cons #xE000 to) ranges) | |
253 to #x33FF)))) | |
254 (if (<= from to) | |
255 (setq ranges (cons (cons from to) ranges)))) | |
256 (mapconcat #'(lambda (x) | |
257 (format "%c-%c" | |
258 (funcall decode-char-no-trans (car x)) | |
259 (funcall decode-char-no-trans (cdr x)))) | |
260 ranges ""))) | |
261 ;; This forces loading tables for utf-translate-cjk-mode. | |
262 (setq utf-translate-cjk-lang-env nil)) | |
263 | |
264 (defcustom utf-translate-cjk-unicode-range '((#x2e80 . #xd7a3) | |
265 (#xff00 . #xffef)) | |
266 "List of Unicode code ranges supported by `utf-translate-cjk-mode'. | |
267 Setting this variable directly does not take effect; | |
268 use either \\[customize] or the function | |
269 `utf-translate-cjk-set-unicode-range'." | |
270 :version "21.4" | |
271 :type '(repeat (cons integer integer)) | |
272 :set (lambda (symbol value) | |
273 (utf-translate-cjk-set-unicode-range value)) | |
274 :group 'mule) | |
204 | 275 |
205 ;; Return non-nil if CODE-POINT is in `utf-translate-cjk-unicode-range'. | 276 ;; Return non-nil if CODE-POINT is in `utf-translate-cjk-unicode-range'. |
206 (defsubst utf-translate-cjk-substitutable-p (code-point) | 277 (defsubst utf-translate-cjk-substitutable-p (code-point) |
207 (let ((tail utf-translate-cjk-unicode-range) | 278 (let ((tail utf-translate-cjk-unicode-range) |
208 elt) | 279 elt) |
210 (setq elt (car tail) tail (cdr tail)) | 281 (setq elt (car tail) tail (cdr tail)) |
211 (if (and (>= code-point (car elt)) (<= code-point (cdr elt))) | 282 (if (and (>= code-point (car elt)) (<= code-point (cdr elt))) |
212 (setq tail nil) | 283 (setq tail nil) |
213 (setq elt nil))) | 284 (setq elt nil))) |
214 elt)) | 285 elt)) |
215 | |
216 (defvar utf-translate-cjk-lang-env nil | |
217 "Language environment in which tables for `utf-translate-cjk-mode' is loaded. | |
218 The value nil means that the tables are not yet loaded.") | |
219 | 286 |
220 (defun utf-translate-cjk-load-tables () | 287 (defun utf-translate-cjk-load-tables () |
221 "Load tables for `utf-translate-cjk-mode'." | 288 "Load tables for `utf-translate-cjk-mode'." |
222 ;; Fixme: Allow the use of the CJK charsets to be | 289 ;; Fixme: Allow the use of the CJK charsets to be |
223 ;; customized by reordering and possible omission. | 290 ;; customized by reordering and possible omission. |
872 (let ((range (string-as-multibyte "^\xc0-\xc3\xe1-\xf7")) | 939 (let ((range (string-as-multibyte "^\xc0-\xc3\xe1-\xf7")) |
873 (buffer-multibyte enable-multibyte-characters) | 940 (buffer-multibyte enable-multibyte-characters) |
874 hash-table ch) | 941 hash-table ch) |
875 (set-buffer-multibyte t) | 942 (set-buffer-multibyte t) |
876 (when utf-translate-cjk-mode | 943 (when utf-translate-cjk-mode |
877 (if (not utf-translate-cjk-lang-env) | 944 (unless utf-translate-cjk-lang-env |
878 ;; Check these characters: | 945 ;; Check these characters in utf-translate-cjk-range. |
879 ;; "U+2e80-U+33ff", "U+ff00-U+ffef" | 946 ;; We may have to translate them to CJK charsets. |
880 ;; We may have to translate them to CJK charsets. | 947 (skip-chars-forward |
881 (let ((range2 "$,29@(B-$,2G$,3r`(B-$,3u/(B")) | 948 (concat range utf-translate-cjk-unicode-range-string)) |
882 (skip-chars-forward (concat range range2)) | 949 (unless (eobp) |
883 (unless (eobp) | 950 (utf-translate-cjk-load-tables) |
884 (utf-translate-cjk-load-tables) | 951 (setq range |
885 (setq range (concat range range2))) | 952 (concat range utf-translate-cjk-unicode-range-string)))) |
886 (setq hash-table (get 'utf-subst-table-for-decode | 953 (setq hash-table (get 'utf-subst-table-for-decode |
887 'translation-hash-table))))) | 954 'translation-hash-table))) |
888 (while (and (skip-chars-forward range) | 955 (while (and (skip-chars-forward range) |
889 (not (eobp))) | 956 (not (eobp))) |
890 (setq ch (following-char)) | 957 (setq ch (following-char)) |
891 (if (< ch 256) | 958 (if (< ch 256) |
892 (utf-8-compose hash-table) | 959 (utf-8-compose hash-table) |