comparison lisp/international/utf-8.el @ 57727:c3945be39e09

(utf-translate-cjk-unicode-range-string): New variable. (utf-translate-cjk-set-unicode-range): New function. (utf-translate-cjk-unicode-range): Make it customizable. (utf-8-post-read-conversion): Use utf-translate-cjk-unicode-range-string.
author Kenichi Handa <handa@m17n.org>
date Wed, 27 Oct 2004 06:01:59 +0000
parents 752ef76fcc08
children e425df7605c9
comparison
equal deleted inserted replaced
57726:66e97a54985f 57727:c3945be39e09
195 chinese-big5-1 chinese-big5-2 195 chinese-big5-1 chinese-big5-2
196 japanese-jisx0208 japanese-jisx0212 196 japanese-jisx0208 japanese-jisx0212
197 korean-ksc5601) 197 korean-ksc5601)
198 "List of charsets supported by `utf-translate-cjk-mode'.") 198 "List of charsets supported by `utf-translate-cjk-mode'.")
199 199
200 (defconst utf-translate-cjk-unicode-range 200 (defvar utf-translate-cjk-lang-env nil
201 '((#x2e80 . #xd7a3) 201 "Language environment in which tables for `utf-translate-cjk-mode' is loaded.
202 (#xff00 . #xffef)) 202 The value nil means that the tables are not yet loaded.")
203 "List of Unicode code ranges supported by `utf-translate-cjk-mode'.") 203
204 (defvar utf-translate-cjk-unicode-range)
205
206 ;; String generated from utf-translate-cjk-unicode-range. It is
207 ;; suitable for an argument to skip-chars-forward.
208 (defvar utf-translate-cjk-unicode-range-string nil)
209
210 (defun utf-translate-cjk-set-unicode-range (range)
211 (setq utf-translate-cjk-unicode-range range)
212 (setq utf-translate-cjk-unicode-range-string
213 (let ((decode-char-no-trans
214 #'(lambda (x)
215 (cond ((< x #x100) (make-char 'latin-iso8859-1 x))
216 ((< x #x2500)
217 (setq x (- x #x100))
218 (make-char 'mule-unicode-0100-24ff
219 (+ (/ x 96) 32) (+ (% x 96) 32)))
220 ((< x #x3400)
221 (setq x (- x #x2500))
222 (make-char 'mule-unicode-2500-33ff
223 (+ (/ x 96) 32) (+ (% x 96) 32)))
224 (t
225 (setq x (- x #xe000))
226 (make-char 'mule-unicode-e000-ffff
227 (+ (/ x 96) 32) (+ (% x 96) 32))))))
228 ranges from to)
229 (dolist (elt range)
230 (setq from (max #xA0 (car elt)) to (min #xffff (cdr elt)))
231 (if (and (>= to #x3400) (< to #xE000))
232 (setq to #x33FF))
233 (cond ((< from #x100)
234 (if (>= to #xE000)
235 (setq ranges (cons (cons #xE000 to) ranges)
236 to #x33FF))
237 (if (>= to #x2500)
238 (setq ranges (cons (cons #x2500 to) ranges)
239 to #x24FF))
240 (if (>= to #x100)
241 (setq ranges (cons (cons #x100 to) ranges)
242 to #xFF)))
243 ((< from #x2500)
244 (if (>= to #xE000)
245 (setq ranges (cons (cons #xE000 to) ranges)
246 to #x33FF))
247 (if (>= to #x2500)
248 (setq ranges (cons (cons #x2500 to) ranges)
249 to #x24FF)))
250 ((< from #x3400)
251 (if (>= to #xE000)
252 (setq ranges (cons (cons #xE000 to) ranges)
253 to #x33FF))))
254 (if (<= from to)
255 (setq ranges (cons (cons from to) ranges))))
256 (mapconcat #'(lambda (x)
257 (format "%c-%c"
258 (funcall decode-char-no-trans (car x))
259 (funcall decode-char-no-trans (cdr x))))
260 ranges "")))
261 ;; This forces loading tables for utf-translate-cjk-mode.
262 (setq utf-translate-cjk-lang-env nil))
263
264 (defcustom utf-translate-cjk-unicode-range '((#x2e80 . #xd7a3)
265 (#xff00 . #xffef))
266 "List of Unicode code ranges supported by `utf-translate-cjk-mode'.
267 Setting this variable directly does not take effect;
268 use either \\[customize] or the function
269 `utf-translate-cjk-set-unicode-range'."
270 :version "21.4"
271 :type '(repeat (cons integer integer))
272 :set (lambda (symbol value)
273 (utf-translate-cjk-set-unicode-range value))
274 :group 'mule)
204 275
205 ;; Return non-nil if CODE-POINT is in `utf-translate-cjk-unicode-range'. 276 ;; Return non-nil if CODE-POINT is in `utf-translate-cjk-unicode-range'.
206 (defsubst utf-translate-cjk-substitutable-p (code-point) 277 (defsubst utf-translate-cjk-substitutable-p (code-point)
207 (let ((tail utf-translate-cjk-unicode-range) 278 (let ((tail utf-translate-cjk-unicode-range)
208 elt) 279 elt)
210 (setq elt (car tail) tail (cdr tail)) 281 (setq elt (car tail) tail (cdr tail))
211 (if (and (>= code-point (car elt)) (<= code-point (cdr elt))) 282 (if (and (>= code-point (car elt)) (<= code-point (cdr elt)))
212 (setq tail nil) 283 (setq tail nil)
213 (setq elt nil))) 284 (setq elt nil)))
214 elt)) 285 elt))
215
216 (defvar utf-translate-cjk-lang-env nil
217 "Language environment in which tables for `utf-translate-cjk-mode' is loaded.
218 The value nil means that the tables are not yet loaded.")
219 286
220 (defun utf-translate-cjk-load-tables () 287 (defun utf-translate-cjk-load-tables ()
221 "Load tables for `utf-translate-cjk-mode'." 288 "Load tables for `utf-translate-cjk-mode'."
222 ;; Fixme: Allow the use of the CJK charsets to be 289 ;; Fixme: Allow the use of the CJK charsets to be
223 ;; customized by reordering and possible omission. 290 ;; customized by reordering and possible omission.
872 (let ((range (string-as-multibyte "^\xc0-\xc3\xe1-\xf7")) 939 (let ((range (string-as-multibyte "^\xc0-\xc3\xe1-\xf7"))
873 (buffer-multibyte enable-multibyte-characters) 940 (buffer-multibyte enable-multibyte-characters)
874 hash-table ch) 941 hash-table ch)
875 (set-buffer-multibyte t) 942 (set-buffer-multibyte t)
876 (when utf-translate-cjk-mode 943 (when utf-translate-cjk-mode
877 (if (not utf-translate-cjk-lang-env) 944 (unless utf-translate-cjk-lang-env
878 ;; Check these characters: 945 ;; Check these characters in utf-translate-cjk-range.
879 ;; "U+2e80-U+33ff", "U+ff00-U+ffef" 946 ;; We may have to translate them to CJK charsets.
880 ;; We may have to translate them to CJK charsets. 947 (skip-chars-forward
881 (let ((range2 "$,29@(B-$,2G$,3r`(B-$,3u/(B")) 948 (concat range utf-translate-cjk-unicode-range-string))
882 (skip-chars-forward (concat range range2)) 949 (unless (eobp)
883 (unless (eobp) 950 (utf-translate-cjk-load-tables)
884 (utf-translate-cjk-load-tables) 951 (setq range
885 (setq range (concat range range2))) 952 (concat range utf-translate-cjk-unicode-range-string))))
886 (setq hash-table (get 'utf-subst-table-for-decode 953 (setq hash-table (get 'utf-subst-table-for-decode
887 'translation-hash-table))))) 954 'translation-hash-table)))
888 (while (and (skip-chars-forward range) 955 (while (and (skip-chars-forward range)
889 (not (eobp))) 956 (not (eobp)))
890 (setq ch (following-char)) 957 (setq ch (following-char))
891 (if (< ch 256) 958 (if (< ch 256)
892 (utf-8-compose hash-table) 959 (utf-8-compose hash-table)