annotate lisp/international/utf-8.el @ 79526:65ae26667e85

*** empty log message ***
author Glenn Morris <rgm@gnu.org>
date Sun, 02 Dec 2007 03:58:04 +0000
parents 2daf9c28b3a4
children b6fdfff4ae81 b98604865ea0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
1 ;;; utf-8.el --- UTF-8 decoding/encoding support -*- coding: iso-2022-7bit -*-
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
2
75347
e3694f1cb928 Add 2007 to copyright years.
Glenn Morris <rgm@gnu.org>
parents: 74544
diff changeset
3 ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007
74544
43cc94d955c2 Update copyright years.
Glenn Morris <rgm@gnu.org>
parents: 73349
diff changeset
4 ;; Free Software Foundation, Inc.
75347
e3694f1cb928 Add 2007 to copyright years.
Glenn Morris <rgm@gnu.org>
parents: 74544
diff changeset
5 ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007
62274
c36561fe0657 Fix copyrights.
Kenichi Handa <handa@m17n.org>
parents: 59996
diff changeset
6 ;; National Institute of Advanced Industrial Science and Technology (AIST)
c36561fe0657 Fix copyrights.
Kenichi Handa <handa@m17n.org>
parents: 59996
diff changeset
7 ;; Registration Number H14PRO021
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
8
37097
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
9 ;; Author: TAKAHASHI Naoto <ntakahas@m17n.org>
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
10 ;; Maintainer: FSF
36243
a05ae5420f85 Doc and commentary fixes.
Dave Love <fx@gnu.org>
parents: 35542
diff changeset
11 ;; Keywords: multilingual, Unicode, UTF-8, i18n
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
12
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
13 ;; This file is part of GNU Emacs.
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
14
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
15 ;; GNU Emacs is free software; you can redistribute it and/or modify
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
16 ;; it under the terms of the GNU General Public License as published by
78310
2daf9c28b3a4 Restore comma mistakenly removed in last change.
Glenn Morris <rgm@gnu.org>
parents: 78301
diff changeset
17 ;; the Free Software Foundation; either version 3, or (at your option)
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
18 ;; any later version.
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
19
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
20 ;; GNU Emacs is distributed in the hope that it will be useful,
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
21 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
23 ;; GNU General Public License for more details.
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
24
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
25 ;; You should have received a copy of the GNU General Public License
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
26 ;; along with GNU Emacs; see the file COPYING. If not, write to the
64085
18a818a2ee7c Update FSF's address.
Lute Kamstra <lute@gnu.org>
parents: 62274
diff changeset
27 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18a818a2ee7c Update FSF's address.
Lute Kamstra <lute@gnu.org>
parents: 62274
diff changeset
28 ;; Boston, MA 02110-1301, USA.
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
29
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
30 ;;; Commentary:
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
31
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
32 ;; The coding-system `mule-utf-8' basically supports encoding/decoding
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
33 ;; of the following character sets to and from UTF-8:
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
34 ;;
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
35 ;; ascii
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
36 ;; eight-bit-control
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
37 ;; latin-iso8859-1
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
38 ;; mule-unicode-0100-24ff
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
39 ;; mule-unicode-2500-33ff
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
40 ;; mule-unicode-e000-ffff
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
41 ;;
36243
a05ae5420f85 Doc and commentary fixes.
Dave Love <fx@gnu.org>
parents: 35542
diff changeset
42 ;; On decoding, Unicode characters that do not fit into the above
a05ae5420f85 Doc and commentary fixes.
Dave Love <fx@gnu.org>
parents: 35542
diff changeset
43 ;; character sets are handled as `eight-bit-control' or
a05ae5420f85 Doc and commentary fixes.
Dave Love <fx@gnu.org>
parents: 35542
diff changeset
44 ;; `eight-bit-graphic' characters to retain the information about the
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
45 ;; original byte sequence and text properties record the corresponding
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
46 ;; unicode.
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
47 ;;
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
48 ;; Fixme: note that reading and writing invalid utf-8 may not be
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
49 ;; idempotent -- to represent the bytes to fix that needs a new charset.
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
50 ;;
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
51 ;; Characters from other character sets can be encoded with mule-utf-8
48848
4eb835c1257d (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 47720
diff changeset
52 ;; by populating the translation table
50179
65bb5afb37ef (utf-fragment-on-decoding): Don't call
Kenichi Handa <handa@m17n.org>
parents: 50085
diff changeset
53 ;; `utf-translation-table-for-encode'. Hash tables
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
54 ;; `utf-subst-table-for-decode' and `utf-subst-table-for-encode' are
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
55 ;; used to support encoding and decoding of about a quarter of the CJK
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
56 ;; space between U+3400 and U+DFFF.
36243
a05ae5420f85 Doc and commentary fixes.
Dave Love <fx@gnu.org>
parents: 35542
diff changeset
57
54304
d61b01de8cdf UTF-8 is now RFC 3629.
Eli Zaretskii <eliz@gnu.org>
parents: 52725
diff changeset
58 ;; UTF-8 is defined in RFC 3629. A sketch of the encoding is:
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
59
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
60 ;; scalar | utf-8
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
61 ;; value | 1st byte | 2nd byte | 3rd byte
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
62 ;; --------------------+-----------+-----------+----------
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
63 ;; 0000 0000 0xxx xxxx | 0xxx xxxx | |
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
64 ;; 0000 0yyy yyxx xxxx | 110y yyyy | 10xx xxxx |
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
65 ;; zzzz yyyy yyxx xxxx | 1110 zzzz | 10yy yyyy | 10xx xxxx
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
66
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
67 ;;; Code:
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
68
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
69 (defvar ucs-mule-to-mule-unicode (make-char-table 'translation-table nil)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
70 "Char table mapping characters to latin-iso8859-1 or mule-unicode-*.
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
71
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
72 If `unify-8859-on-encoding-mode' is non-nil, this table populates the
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
73 translation-table named `utf-translation-table-for-encode'.")
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
74
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
75 (define-translation-table 'utf-translation-table-for-encode)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
76
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
77
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
78 ;; Map Cyrillic and Greek to iso-8859 charsets, which take half the
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
79 ;; space of mule-unicode. For Latin scripts this isn't very
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
80 ;; important. Hebrew and Arabic might go here too when there's proper
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
81 ;; support for them.
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
82
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
83 (defvar utf-fragmentation-table (make-char-table 'translation-table nil)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
84 "Char-table normally mapping non-Latin mule-unicode-* chars to iso-8859-*.
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
85
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
86 If `utf-fragment-on-decoding' is non-nil, this table populates the
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
87 translation-table named `utf-translation-table-for-decode'")
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
88
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
89 (defvar utf-defragmentation-table (make-char-table 'translation-table nil)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
90 "Char-table for reverse mapping of `utf-fragmentation-table'.
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
91
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
92 If `utf-fragment-on-decoding' is non-nil and
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
93 `unify-8859-on-encoding-mode' is nil, this table populates the
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
94 translation-table named `utf-translation-table-for-encode'")
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
95
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
96 (define-translation-table 'utf-translation-table-for-decode)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
97
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
98
48882
d17c0d3e36ba (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 48848
diff changeset
99 (defvar ucs-mule-cjk-to-unicode (make-hash-table :test 'eq)
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
100 "Hash table mapping Emacs CJK character sets to Unicode code points.
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
101
55437
6e677a935fe9 Fix references to utf-translate-cjk into utf-translate-cjk-mode.
Andreas Schwab <schwab@suse.de>
parents: 54304
diff changeset
102 If `utf-translate-cjk-mode' is non-nil, this table populates the
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
103 translation-hash-table named `utf-subst-table-for-encode'.")
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
104
48882
d17c0d3e36ba (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 48848
diff changeset
105 (define-translation-hash-table 'utf-subst-table-for-encode
d17c0d3e36ba (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 48848
diff changeset
106 ucs-mule-cjk-to-unicode)
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
107
48882
d17c0d3e36ba (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 48848
diff changeset
108 (defvar ucs-unicode-to-mule-cjk (make-hash-table :test 'eq)
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
109 "Hash table mapping Unicode code points to Emacs CJK character sets.
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
110
55437
6e677a935fe9 Fix references to utf-translate-cjk into utf-translate-cjk-mode.
Andreas Schwab <schwab@suse.de>
parents: 54304
diff changeset
111 If `utf-translate-cjk-mode' is non-nil, this table populates the
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
112 translation-hash-table named `utf-subst-table-for-decode'.")
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
113
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
114 (define-translation-hash-table 'utf-subst-table-for-decode
48882
d17c0d3e36ba (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 48848
diff changeset
115 ucs-unicode-to-mule-cjk)
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
116
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
117 (mapc
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
118 (lambda (pair)
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
119 (aset utf-fragmentation-table (car pair) (cdr pair))
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
120 (aset utf-defragmentation-table (cdr pair) (car pair)))
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
121 '((?$,1&d(B . ?,F4(B) (?$,1&e(B . ?,F5(B) (?$,1&f(B . ?,F6(B) (?$,1&h(B . ?,F8(B) (?$,1&i(B . ?,F9(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
122 (?$,1&j(B . ?,F:(B) (?$,1&l(B . ?,F<(B) (?$,1&n(B . ?,F>(B) (?$,1&o(B . ?,F?(B) (?$,1&p(B . ?,F@(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
123 (?$,1&q(B . ?,FA(B) (?$,1&r(B . ?,FB(B) (?$,1&s(B . ?,FC(B) (?$,1&t(B . ?,FD(B) (?$,1&u(B . ?,FE(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
124 (?$,1&v(B . ?,FF(B) (?$,1&w(B . ?,FG(B) (?$,1&x(B . ?,FH(B) (?$,1&y(B . ?,FI(B) (?$,1&z(B . ?,FJ(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
125 (?$,1&{(B . ?,FK(B) (?$,1&|(B . ?,FL(B) (?$,1&}(B . ?,FM(B) (?$,1&~(B . ?,FN(B) (?$,1&(B . ?,FO(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
126 (?$,1' (B . ?,FP(B) (?$,1'!(B . ?,FQ(B) (?$,1'#(B . ?,FS(B) (?$,1'$(B . ?,FT(B) (?$,1'%(B . ?,FU(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
127 (?$,1'&(B . ?,FV(B) (?$,1''(B . ?,FW(B) (?$,1'((B . ?,FX(B) (?$,1')(B . ?,FY(B) (?$,1'*(B . ?,FZ(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
128 (?$,1'+(B . ?,F[(B) (?$,1',(B . ?,F\(B) (?$,1'-(B . ?,F](B) (?$,1'.(B . ?,F^(B) (?$,1'/(B . ?,F_(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
129 (?$,1'0(B . ?,F`(B) (?$,1'1(B . ?,Fa(B) (?$,1'2(B . ?,Fb(B) (?$,1'3(B . ?,Fc(B) (?$,1'4(B . ?,Fd(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
130 (?$,1'5(B . ?,Fe(B) (?$,1'6(B . ?,Ff(B) (?$,1'7(B . ?,Fg(B) (?$,1'8(B . ?,Fh(B) (?$,1'9(B . ?,Fi(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
131 (?$,1':(B . ?,Fj(B) (?$,1';(B . ?,Fk(B) (?$,1'<(B . ?,Fl(B) (?$,1'=(B . ?,Fm(B) (?$,1'>(B . ?,Fn(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
132 (?$,1'?(B . ?,Fo(B) (?$,1'@(B . ?,Fp(B) (?$,1'A(B . ?,Fq(B) (?$,1'B(B . ?,Fr(B) (?$,1'C(B . ?,Fs(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
133 (?$,1'D(B . ?,Ft(B) (?$,1'E(B . ?,Fu(B) (?$,1'F(B . ?,Fv(B) (?$,1'G(B . ?,Fw(B) (?$,1'H(B . ?,Fx(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
134 (?$,1'I(B . ?,Fy(B) (?$,1'J(B . ?,Fz(B) (?$,1'K(B . ?,F{(B) (?$,1'L(B . ?,F|(B) (?$,1'M(B . ?,F}(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
135 (?$,1'N(B . ?,F~(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
136
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
137 (?$,1(!(B . ?,L!(B) (?$,1("(B . ?,L"(B) (?$,1(#(B . ?,L#(B) (?$,1($(B . ?,L$(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
138 (?$,1(%(B . ?,L%(B) (?$,1(&(B . ?,L&(B) (?$,1('(B . ?,L'(B) (?$,1(((B . ?,L((B) (?$,1()(B . ?,L)(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
139 (?$,1(*(B . ?,L*(B) (?$,1(+(B . ?,L+(B) (?$,1(,(B . ?,L,(B) (?$,1(.(B . ?,L.(B) (?$,1(/(B . ?,L/(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
140 (?$,1(0(B . ?,L0(B) (?$,1(1(B . ?,L1(B) (?$,1(2(B . ?,L2(B) (?$,1(3(B . ?,L3(B) (?$,1(4(B . ?,L4(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
141 (?$,1(5(B . ?,L5(B) (?$,1(6(B . ?,L6(B) (?$,1(7(B . ?,L7(B) (?$,1(8(B . ?,L8(B) (?$,1(9(B . ?,L9(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
142 (?$,1(:(B . ?,L:(B) (?$,1(;(B . ?,L;(B) (?$,1(<(B . ?,L<(B) (?$,1(=(B . ?,L=(B) (?$,1(>(B . ?,L>(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
143 (?$,1(?(B . ?,L?(B) (?$,1(@(B . ?,L@(B) (?$,1(A(B . ?,LA(B) (?$,1(B(B . ?,LB(B) (?$,1(C(B . ?,LC(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
144 (?$,1(D(B . ?,LD(B) (?$,1(E(B . ?,LE(B) (?$,1(F(B . ?,LF(B) (?$,1(G(B . ?,LG(B) (?$,1(H(B . ?,LH(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
145 (?$,1(I(B . ?,LI(B) (?$,1(J(B . ?,LJ(B) (?$,1(K(B . ?,LK(B) (?$,1(L(B . ?,LL(B) (?$,1(M(B . ?,LM(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
146 (?$,1(N(B . ?,LN(B) (?$,1(O(B . ?,LO(B) (?$,1(P(B . ?,LP(B) (?$,1(Q(B . ?,LQ(B) (?$,1(R(B . ?,LR(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
147 (?$,1(S(B . ?,LS(B) (?$,1(T(B . ?,LT(B) (?$,1(U(B . ?,LU(B) (?$,1(V(B . ?,LV(B) (?$,1(W(B . ?,LW(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
148 (?$,1(X(B . ?,LX(B) (?$,1(Y(B . ?,LY(B) (?$,1(Z(B . ?,LZ(B) (?$,1([(B . ?,L[(B) (?$,1(\(B . ?,L\(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
149 (?$,1(](B . ?,L](B) (?$,1(^(B . ?,L^(B) (?$,1(_(B . ?,L_(B) (?$,1(`(B . ?,L`(B) (?$,1(a(B . ?,La(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
150 (?$,1(b(B . ?,Lb(B) (?$,1(c(B . ?,Lc(B) (?$,1(d(B . ?,Ld(B) (?$,1(e(B . ?,Le(B) (?$,1(f(B . ?,Lf(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
151 (?$,1(g(B . ?,Lg(B) (?$,1(h(B . ?,Lh(B) (?$,1(i(B . ?,Li(B) (?$,1(j(B . ?,Lj(B) (?$,1(k(B . ?,Lk(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
152 (?$,1(l(B . ?,Ll(B) (?$,1(m(B . ?,Lm(B) (?$,1(n(B . ?,Ln(B) (?$,1(o(B . ?,Lo(B) (?$,1(q(B . ?,Lq(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
153 (?$,1(r(B . ?,Lr(B) (?$,1(s(B . ?,Ls(B) (?$,1(t(B . ?,Lt(B) (?$,1(u(B . ?,Lu(B) (?$,1(v(B . ?,Lv(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
154 (?$,1(w(B . ?,Lw(B) (?$,1(x(B . ?,Lx(B) (?$,1(y(B . ?,Ly(B) (?$,1(z(B . ?,Lz(B) (?$,1({(B . ?,L{(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
155 (?$,1(|(B . ?,L|(B) (?$,1(~(B . ?,L~(B) (?$,1((B . ?,L(B)))
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
156
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
157
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
158 (defcustom utf-fragment-on-decoding nil
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
159 "Whether or not to decode some chars in UTF-8/16 text into iso8859 charsets.
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
160 Setting this means that the relevant Cyrillic and Greek characters are
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
161 decoded into the iso8859 charsets rather than into
47231
2d6a05542b5b *** empty log message ***
Kenichi Handa <handa@m17n.org>
parents: 46676
diff changeset
162 mule-unicode-0100-24ff. The iso8859 charsets take half as much space
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
163 in the buffer, but using them may affect how the buffer can be re-encoded
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
164 and may require a different input method to search for them, for instance.
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
165 See `unify-8859-on-decoding-mode' and `unify-8859-on-encoding-mode'
47231
2d6a05542b5b *** empty log message ***
Kenichi Handa <handa@m17n.org>
parents: 46676
diff changeset
166 for mechanisms to make this largely transparent.
2d6a05542b5b *** empty log message ***
Kenichi Handa <handa@m17n.org>
parents: 46676
diff changeset
167
2d6a05542b5b *** empty log message ***
Kenichi Handa <handa@m17n.org>
parents: 46676
diff changeset
168 Setting this variable outside customize has no effect."
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
169 :set (lambda (s v)
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
170 (if v
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
171 (progn
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
172 (define-translation-table 'utf-translation-table-for-decode
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
173 utf-fragmentation-table)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
174 ;; Even if unify-8859-on-encoding-mode is off, make
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
175 ;; mule-utf-* encode characters in
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
176 ;; utf-fragmentation-table.
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
177 (unless (eq (get 'utf-translation-table-for-encode
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
178 'translation-table)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
179 ucs-mule-to-mule-unicode)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
180 (define-translation-table 'utf-translation-table-for-encode
50179
65bb5afb37ef (utf-fragment-on-decoding): Don't call
Kenichi Handa <handa@m17n.org>
parents: 50085
diff changeset
181 utf-defragmentation-table)))
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
182 (define-translation-table 'utf-translation-table-for-decode)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
183 ;; When unify-8859-on-encoding-mode is off, be sure to make
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
184 ;; mule-utf-* disabled for characters in
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
185 ;; utf-fragmentation-table.
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
186 (unless (eq (get 'utf-translation-table-for-encode
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
187 'translation-table)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
188 ucs-mule-to-mule-unicode)
50179
65bb5afb37ef (utf-fragment-on-decoding): Don't call
Kenichi Handa <handa@m17n.org>
parents: 50085
diff changeset
189 (define-translation-table 'utf-translation-table-for-encode)))
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
190 (set-default s v))
59996
aac0a33f5772 Change release version from 21.4 to 22.1 throughout.
Kim F. Storm <storm@cua.dk>
parents: 59096
diff changeset
191 :version "22.1"
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
192 :type 'boolean
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
193 :group 'mule)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
194
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
195
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
196 (defconst utf-translate-cjk-charsets '(chinese-gb2312
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
197 chinese-big5-1 chinese-big5-2
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
198 japanese-jisx0208 japanese-jisx0212
57761
13239a8e9e80 (utf-translate-cjk-charsets): Add katakana-jisx0201.
Kenichi Handa <handa@m17n.org>
parents: 57737
diff changeset
199 katakana-jisx0201
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
200 korean-ksc5601)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
201 "List of charsets supported by `utf-translate-cjk-mode'.")
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
202
57727
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
203 (defvar utf-translate-cjk-lang-env nil
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
204 "Language environment in which tables for `utf-translate-cjk-mode' is loaded.
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
205 The value nil means that the tables are not yet loaded.")
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
206
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
207 (defvar utf-translate-cjk-unicode-range)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
208
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
209 ;; String generated from utf-translate-cjk-unicode-range. It is
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
210 ;; suitable for an argument to skip-chars-forward.
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
211 (defvar utf-translate-cjk-unicode-range-string nil)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
212
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
213 (defun utf-translate-cjk-set-unicode-range (range)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
214 (setq utf-translate-cjk-unicode-range range)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
215 (setq utf-translate-cjk-unicode-range-string
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
216 (let ((decode-char-no-trans
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
217 #'(lambda (x)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
218 (cond ((< x #x100) (make-char 'latin-iso8859-1 x))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
219 ((< x #x2500)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
220 (setq x (- x #x100))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
221 (make-char 'mule-unicode-0100-24ff
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
222 (+ (/ x 96) 32) (+ (% x 96) 32)))
59996
aac0a33f5772 Change release version from 21.4 to 22.1 throughout.
Kim F. Storm <storm@cua.dk>
parents: 59096
diff changeset
223 ((< x #x3400)
57727
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
224 (setq x (- x #x2500))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
225 (make-char 'mule-unicode-2500-33ff
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
226 (+ (/ x 96) 32) (+ (% x 96) 32)))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
227 (t
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
228 (setq x (- x #xe000))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
229 (make-char 'mule-unicode-e000-ffff
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
230 (+ (/ x 96) 32) (+ (% x 96) 32))))))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
231 ranges from to)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
232 (dolist (elt range)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
233 (setq from (max #xA0 (car elt)) to (min #xffff (cdr elt)))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
234 (if (and (>= to #x3400) (< to #xE000))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
235 (setq to #x33FF))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
236 (cond ((< from #x100)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
237 (if (>= to #xE000)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
238 (setq ranges (cons (cons #xE000 to) ranges)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
239 to #x33FF))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
240 (if (>= to #x2500)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
241 (setq ranges (cons (cons #x2500 to) ranges)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
242 to #x24FF))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
243 (if (>= to #x100)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
244 (setq ranges (cons (cons #x100 to) ranges)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
245 to #xFF)))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
246 ((< from #x2500)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
247 (if (>= to #xE000)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
248 (setq ranges (cons (cons #xE000 to) ranges)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
249 to #x33FF))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
250 (if (>= to #x2500)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
251 (setq ranges (cons (cons #x2500 to) ranges)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
252 to #x24FF)))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
253 ((< from #x3400)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
254 (if (>= to #xE000)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
255 (setq ranges (cons (cons #xE000 to) ranges)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
256 to #x33FF))))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
257 (if (<= from to)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
258 (setq ranges (cons (cons from to) ranges))))
59996
aac0a33f5772 Change release version from 21.4 to 22.1 throughout.
Kim F. Storm <storm@cua.dk>
parents: 59096
diff changeset
259 (mapconcat #'(lambda (x)
aac0a33f5772 Change release version from 21.4 to 22.1 throughout.
Kim F. Storm <storm@cua.dk>
parents: 59096
diff changeset
260 (format "%c-%c"
57727
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
261 (funcall decode-char-no-trans (car x))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
262 (funcall decode-char-no-trans (cdr x))))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
263 ranges "")))
57737
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
264 ;; These forces loading and settting tables for
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
265 ;; utf-translate-cjk-mode.
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
266 (setq utf-translate-cjk-lang-env nil
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
267 ucs-mule-cjk-to-unicode (make-hash-table :test 'eq)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
268 ucs-unicode-to-mule-cjk (make-hash-table :test 'eq)))
57727
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
269
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
270 (defcustom utf-translate-cjk-unicode-range '((#x2e80 . #xd7a3)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
271 (#xff00 . #xffef))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
272 "List of Unicode code ranges supported by `utf-translate-cjk-mode'.
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
273 Setting this variable directly does not take effect;
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
274 use either \\[customize] or the function
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
275 `utf-translate-cjk-set-unicode-range'."
59996
aac0a33f5772 Change release version from 21.4 to 22.1 throughout.
Kim F. Storm <storm@cua.dk>
parents: 59096
diff changeset
276 :version "22.1"
57727
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
277 :type '(repeat (cons integer integer))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
278 :set (lambda (symbol value)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
279 (utf-translate-cjk-set-unicode-range value))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
280 :group 'mule)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
281
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
282 ;; Return non-nil if CODE-POINT is in `utf-translate-cjk-unicode-range'.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
283 (defsubst utf-translate-cjk-substitutable-p (code-point)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
284 (let ((tail utf-translate-cjk-unicode-range)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
285 elt)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
286 (while tail
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
287 (setq elt (car tail) tail (cdr tail))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
288 (if (and (>= code-point (car elt)) (<= code-point (cdr elt)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
289 (setq tail nil)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
290 (setq elt nil)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
291 elt))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
292
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
293 (defun utf-translate-cjk-load-tables ()
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
294 "Load tables for `utf-translate-cjk-mode'."
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
295 ;; Fixme: Allow the use of the CJK charsets to be
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
296 ;; customized by reordering and possible omission.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
297 (let ((redefined (< (hash-table-size ucs-mule-cjk-to-unicode) 43000)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
298 (if redefined
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
299 ;; Redefine them with realistic initial sizes and a
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
300 ;; smallish rehash size to avoid wasting significant
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
301 ;; space after they're built.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
302 (setq ucs-mule-cjk-to-unicode
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
303 (make-hash-table :test 'eq :size 43000 :rehash-size 1000)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
304 ucs-unicode-to-mule-cjk
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
305 (make-hash-table :test 'eq :size 21500 :rehash-size 1000)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
306
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
307 ;; Load the files explicitly, to avoid having to keep
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
308 ;; around the large tables they contain (as well as the
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
309 ;; ones which get built).
59096
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
310 ;; Here we bind coding-system-for-read to nil so that coding tags
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
311 ;; in the files are respected even if the files are not yet
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
312 ;; byte-compiled
73349
43ccf7c7d312 * international/utf-8.el (utf-translate-cjk-load-tables): Avoid
Chong Yidong <cyd@stupidchicken.com>
parents: 66062
diff changeset
313 (let ((coding-system-for-read nil)
43ccf7c7d312 * international/utf-8.el (utf-translate-cjk-load-tables): Avoid
Chong Yidong <cyd@stupidchicken.com>
parents: 66062
diff changeset
314 ;; We must avoid clobbering this variable, in case the load
43ccf7c7d312 * international/utf-8.el (utf-translate-cjk-load-tables): Avoid
Chong Yidong <cyd@stupidchicken.com>
parents: 66062
diff changeset
315 ;; files below use different coding systems.
43ccf7c7d312 * international/utf-8.el (utf-translate-cjk-load-tables): Avoid
Chong Yidong <cyd@stupidchicken.com>
parents: 66062
diff changeset
316 (last-coding-system-used last-coding-system-used))
59096
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
317 (cond ((string= "Korean" current-language-environment)
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
318 (load "subst-jis")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
319 (load "subst-big5")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
320 (load "subst-gb2312")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
321 (load "subst-ksc"))
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
322 ((string= "Chinese-BIG5" current-language-environment)
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
323 (load "subst-jis")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
324 (load "subst-ksc")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
325 (load "subst-gb2312")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
326 (load "subst-big5"))
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
327 ((string= "Chinese-GB" current-language-environment)
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
328 (load "subst-jis")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
329 (load "subst-ksc")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
330 (load "subst-big5")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
331 (load "subst-gb2312"))
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
332 (t
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
333 (load "subst-ksc")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
334 (load "subst-gb2312")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
335 (load "subst-big5")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
336 (load "subst-jis")))) ; jis covers as much as big5, gb2312
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
337
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
338 (when redefined
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
339 (define-translation-hash-table 'utf-subst-table-for-decode
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
340 ucs-unicode-to-mule-cjk)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
341 (define-translation-hash-table 'utf-subst-table-for-encode
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
342 ucs-mule-cjk-to-unicode)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
343 (set-char-table-extra-slot (get 'utf-translation-table-for-encode
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
344 'translation-table)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
345 1 ucs-mule-cjk-to-unicode))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
346
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
347 (setq utf-translate-cjk-lang-env current-language-environment)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
348
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
349 (defun utf-lookup-subst-table-for-decode (code-point)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
350 (if (and utf-translate-cjk-mode
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
351 (not utf-translate-cjk-lang-env)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
352 (utf-translate-cjk-substitutable-p code-point))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
353 (utf-translate-cjk-load-tables))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
354 (gethash code-point
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
355 (get 'utf-subst-table-for-decode 'translation-hash-table)))
56562
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
356
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
357
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
358 (defun utf-lookup-subst-table-for-encode (char)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
359 (if (and utf-translate-cjk-mode
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
360 (not utf-translate-cjk-lang-env)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
361 (memq (char-charset char) utf-translate-cjk-charsets))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
362 (utf-translate-cjk-load-tables))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
363 (gethash char
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
364 (get 'utf-subst-table-for-encode 'translation-hash-table)))
56562
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
365
50341
f49a20cb84ed (utf-translate-cjk-mode): Minor mode,
Kai Großjohann <kgrossjo@eu.uu.net>
parents: 50207
diff changeset
366 (define-minor-mode utf-translate-cjk-mode
56562
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
367 "Toggle whether UTF based coding systems de/encode CJK characters.
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
368 If ARG is an integer, enable if ARG is positive and disable if
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
369 zero or negative. This is a minor mode.
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
370 Enabling this allows the coding systems mule-utf-8,
51628
abfc7d48b476 (utf-translate-cjk-mode): Fix docstring.
Kenichi Handa <handa@m17n.org>
parents: 50766
diff changeset
371 mule-utf-16le and mule-utf-16be to encode characters in the charsets
48848
4eb835c1257d (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 47720
diff changeset
372 `korean-ksc5601', `chinese-gb2312', `chinese-big5-1',
4eb835c1257d (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 47720
diff changeset
373 `chinese-big5-2', `japanese-jisx0208' and `japanese-jisx0212', and to
4eb835c1257d (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 47720
diff changeset
374 decode the corresponding unicodes into such characters.
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
375
48848
4eb835c1257d (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 47720
diff changeset
376 Where the charsets overlap, the one preferred for decoding is chosen
4eb835c1257d (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 47720
diff changeset
377 according to the language environment in effect when this option is
4eb835c1257d (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 47720
diff changeset
378 turned on: ksc5601 for Korean, gb2312 for Chinese-GB, big5 for
4eb835c1257d (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 47720
diff changeset
379 Chinese-Big5 and jisx for other environments.
4eb835c1257d (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 47720
diff changeset
380
56562
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
381 This mode is on by default. If you are not interested in CJK
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
382 characters and want to avoid some overhead on encoding/decoding
56562
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
383 by the above coding systems, you can customize the user option
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
384 `utf-translate-cjk-mode' to nil."
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
385 :init-value t
59996
aac0a33f5772 Change release version from 21.4 to 22.1 throughout.
Kim F. Storm <storm@cua.dk>
parents: 59096
diff changeset
386 :version "22.1"
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
387 :type 'boolean
50341
f49a20cb84ed (utf-translate-cjk-mode): Minor mode,
Kai Großjohann <kgrossjo@eu.uu.net>
parents: 50207
diff changeset
388 :group 'mule
f49a20cb84ed (utf-translate-cjk-mode): Minor mode,
Kai Großjohann <kgrossjo@eu.uu.net>
parents: 50207
diff changeset
389 :global t
f49a20cb84ed (utf-translate-cjk-mode): Minor mode,
Kai Großjohann <kgrossjo@eu.uu.net>
parents: 50207
diff changeset
390 (if utf-translate-cjk-mode
f49a20cb84ed (utf-translate-cjk-mode): Minor mode,
Kai Großjohann <kgrossjo@eu.uu.net>
parents: 50207
diff changeset
391 (progn
50766
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
392 (define-translation-hash-table 'utf-subst-table-for-decode
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
393 ucs-unicode-to-mule-cjk)
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
394 (define-translation-hash-table 'utf-subst-table-for-encode
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
395 ucs-mule-cjk-to-unicode)
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
396 (set-char-table-extra-slot (get 'utf-translation-table-for-encode
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
397 'translation-table)
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
398 1 ucs-mule-cjk-to-unicode))
50549
c8525ac04d76 (utf-translate-cjk-mode): Fix incorrect
Kenichi Handa <handa@m17n.org>
parents: 50341
diff changeset
399 (define-translation-hash-table 'utf-subst-table-for-decode
c8525ac04d76 (utf-translate-cjk-mode): Fix incorrect
Kenichi Handa <handa@m17n.org>
parents: 50341
diff changeset
400 (make-hash-table :test 'eq))
c8525ac04d76 (utf-translate-cjk-mode): Fix incorrect
Kenichi Handa <handa@m17n.org>
parents: 50341
diff changeset
401 (define-translation-hash-table 'utf-subst-table-for-encode
50766
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
402 (make-hash-table :test 'eq))
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
403 (set-char-table-extra-slot (get 'utf-translation-table-for-encode
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
404 'translation-table)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
405 1 nil))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
406
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
407 ;; Update safe-chars of mule-utf-* coding systems.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
408 (dolist (elt (coding-system-list t))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
409 (if (string-match "^mule-utf" (symbol-name elt))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
410 (let ((safe-charsets (coding-system-get elt 'safe-charsets))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
411 (safe-chars (coding-system-get elt 'safe-chars))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
412 (need-update nil))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
413 (dolist (charset utf-translate-cjk-charsets)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
414 (unless (eq utf-translate-cjk-mode (memq charset safe-charsets))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
415 (setq safe-charsets
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
416 (if utf-translate-cjk-mode
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
417 (cons charset safe-charsets)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
418 (delq charset safe-charsets))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
419 need-update t)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
420 (aset safe-chars (make-char charset) utf-translate-cjk-mode)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
421 (when need-update
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
422 (coding-system-put elt 'safe-charsets safe-charsets)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
423 (define-coding-system-internal elt))))))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
424
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
425 (define-ccl-program ccl-mule-utf-untrans
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
426 ;; R0 is an untranslatable Unicode code-point (U+3500..U+DFFF or
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
427 ;; U+10000..U+10FFFF) or an invaid byte (#x00..#xFF). Write
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
428 ;; eight-bit-control/graphic sequence (2 to 4 chars) representing
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
429 ;; UTF-8 sequence of r0. Registers r4, r5, r6 are modified.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
430 ;;
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
431 ;; This is a subrountine because we assume that this is called very
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
432 ;; rarely (so we don't have to worry about the overhead of the
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
433 ;; call).
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
434 `(0
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
435 ((r5 = ,(charset-id 'eight-bit-control))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
436 (r6 = ,(charset-id 'eight-bit-graphic))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
437 (if (r0 < #x100)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
438 ((r4 = ((r0 >> 6) | #xC0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
439 (write-multibyte-character r6 r4))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
440 ((if (r0 < #x10000)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
441 ((r4 = ((r0 >> 12) | #xE0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
442 (write-multibyte-character r6 r4))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
443 ((r4 = ((r0 >> 18) | #xF0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
444 (write-multibyte-character r6 r4)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
445 (r4 = (((r0 >> 12) & #x3F) | #x80))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
446 (if (r4 < #xA0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
447 (write-multibyte-character r5 r4)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
448 (write-multibyte-character r6 r4))))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
449 (r4 = (((r0 >> 6) & #x3F) | #x80))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
450 (if (r4 < #xA0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
451 (write-multibyte-character r5 r4)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
452 (write-multibyte-character r6 r4))))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
453 (r4 = ((r0 & #x3F) | #x80))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
454 (if (r4 < #xA0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
455 (write-multibyte-character r5 r4)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
456 (write-multibyte-character r6 r4)))))
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
457
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
458 (define-ccl-program ccl-decode-mule-utf-8
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
459 ;;
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
460 ;; charset | bytes in utf-8 | bytes in emacs
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
461 ;; -----------------------+----------------+---------------
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
462 ;; ascii | 1 | 1
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
463 ;; -----------------------+----------------+---------------
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
464 ;; eight-bit-control | 2 | 2
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
465 ;; eight-bit-graphic | 2 | 1
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
466 ;; latin-iso8859-1 | 2 | 2
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
467 ;; -----------------------+----------------+---------------
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
468 ;; mule-unicode-0100-24ff | 2 | 4
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
469 ;; (< 0800) | |
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
470 ;; -----------------------+----------------+---------------
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
471 ;; mule-unicode-0100-24ff | 3 | 4
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
472 ;; (>= 8000) | |
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
473 ;; mule-unicode-2500-33ff | 3 | 4
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
474 ;; mule-unicode-e000-ffff | 3 | 4
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
475 ;; -----------------------+----------------+---------------
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
476 ;; invalid byte | 1 | 2
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
477 ;;
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
478 ;; Thus magnification factor is two.
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
479 ;;
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
480 `(2
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
481 ((r6 = ,(charset-id 'latin-iso8859-1))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
482 (read r0)
37934
88389fa9b713 (ccl-decode-mule-utf-8): Handle
Gerd Moellmann <gerd@gnu.org>
parents: 37097
diff changeset
483 (loop
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
484 (if (r0 < #x80)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
485 ;; 1-byte encoding, i.e., ascii
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
486 (write-read-repeat r0))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
487 (if (r0 < #xc2)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
488 ;; continuation byte (invalid here) or 1st byte of overlong
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
489 ;; 2-byte sequence.
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
490 ((call ccl-mule-utf-untrans)
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
491 (r6 = ,(charset-id 'latin-iso8859-1))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
492 (read r0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
493 (repeat)))
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
494
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
495 ;; Read the 2nd byte.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
496 (read r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
497 (if ((r1 & #b11000000) != #b10000000) ; Invalid 2nd byte
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
498 ((call ccl-mule-utf-untrans)
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
499 (r6 = ,(charset-id 'latin-iso8859-1))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
500 ;; Handle it in the next loop.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
501 (r0 = r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
502 (repeat)))
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
503
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
504 (if (r0 < #xe0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
505 ;; 2-byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
506 ((r1 &= #x3F)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
507 (r1 |= ((r0 & #x1F) << 6))
57737
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
508 ;; Now r1 holds scalar value. We don't have to check
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
509 ;; `overlong sequence' because r0 >= 0xC2.
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
510
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
511 (if (r1 >= 256)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
512 ;; mule-unicode-0100-24ff (< 0800)
57737
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
513 ((r0 = r1)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
514 (lookup-integer utf-subst-table-for-decode r0 r1)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
515 (if (r7 == 0)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
516 ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
517 (r1 -= #x0100)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
518 (r2 = (((r1 / 96) + 32) << 7))
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
519 (r1 %= 96)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
520 (r1 += (r2 + 32))
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
521 (translate-character
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
522 utf-translation-table-for-decode r0 r1)))
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
523 (write-multibyte-character r0 r1)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
524 (read r0)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
525 (repeat))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
526 (if (r1 >= 160)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
527 ;; latin-iso8859-1
57737
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
528 ((r0 = r1)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
529 (lookup-integer utf-subst-table-for-decode r0 r1)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
530 (if (r7 == 0)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
531 ((r1 -= 128)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
532 (write-multibyte-character r6 r1))
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
533 ((write-multibyte-character r0 r1)))
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
534 (read r0)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
535 (repeat))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
536 ;; eight-bit-control
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
537 ((r0 = ,(charset-id 'eight-bit-control))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
538 (write-multibyte-character r0 r1)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
539 (read r0)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
540 (repeat))))))
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
541
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
542 ;; Read the 3rd bytes.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
543 (read r2)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
544 (if ((r2 & #b11000000) != #b10000000) ; Invalid 3rd byte
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
545 ((call ccl-mule-utf-untrans)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
546 (r0 = r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
547 (call ccl-mule-utf-untrans)
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
548 (r6 = ,(charset-id 'latin-iso8859-1))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
549 ;; Handle it in the next loop.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
550 (r0 = r2)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
551 (repeat)))
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
552
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
553 (if (r0 < #xF0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
554 ;; 3byte encoding
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
555 ;; zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
556 ((r3 = ((r0 & #xF) << 12))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
557 (r3 |= ((r1 & #x3F) << 6))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
558 (r3 |= (r2 & #x3F))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
559
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
560 (if (r3 < #x800) ; `overlong sequence'
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
561 ((call ccl-mule-utf-untrans)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
562 (r0 = r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
563 (call ccl-mule-utf-untrans)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
564 (r0 = r2)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
565 (call ccl-mule-utf-untrans)
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
566 (r6 = ,(charset-id 'latin-iso8859-1))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
567 (read r0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
568 (repeat)))
49598
0d8b17d428b5 Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents: 49028
diff changeset
569
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
570 (if (r3 < #x2500)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
571 ;; mule-unicode-0100-24ff (>= 0800)
57737
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
572 ((r0 = r3)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
573 (lookup-integer utf-subst-table-for-decode r0 r1)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
574 (if (r7 == 0)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
575 ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
576 (r3 -= #x0100)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
577 (r3 //= 96)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
578 (r1 = (r7 + 32))
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
579 (r1 += ((r3 + 32) << 7))
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
580 (translate-character
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
581 utf-translation-table-for-decode r0 r1)))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
582 (write-multibyte-character r0 r1)
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
583 (read r0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
584 (repeat)))
49598
0d8b17d428b5 Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents: 49028
diff changeset
585
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
586 (if (r3 < #x3400)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
587 ;; mule-unicode-2500-33ff
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
588 ((r0 = r3) ; don't zap r3
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
589 (lookup-integer utf-subst-table-for-decode r0 r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
590 (if (r7 == 0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
591 ((r0 = ,(charset-id 'mule-unicode-2500-33ff))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
592 (r3 -= #x2500)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
593 (r3 //= 96)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
594 (r1 = (r7 + 32))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
595 (r1 += ((r3 + 32) << 7))))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
596 (write-multibyte-character r0 r1)
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
597 (read r0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
598 (repeat)))
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
599
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
600 (if (r3 < #xE000)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
601 ;; Try to convert to CJK chars, else
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
602 ;; keep them as eight-bit-{control|graphic}.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
603 ((r0 = r3)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
604 (lookup-integer utf-subst-table-for-decode r3 r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
605 (if r7
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
606 ;; got a translation
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
607 ((write-multibyte-character r3 r1)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
608 (read r0)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
609 (repeat))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
610 ((call ccl-mule-utf-untrans)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
611 (r6 = ,(charset-id 'latin-iso8859-1))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
612 (read r0)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
613 (repeat)))))
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
614
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
615 ;; mule-unicode-e000-ffff
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
616 ;; Fixme: fffe and ffff are invalid.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
617 (r0 = r3) ; don't zap r3
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
618 (lookup-integer utf-subst-table-for-decode r0 r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
619 (if (r7 == 0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
620 ((r0 = ,(charset-id 'mule-unicode-e000-ffff))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
621 (r3 -= #xe000)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
622 (r3 //= 96)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
623 (r1 = (r7 + 32))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
624 (r1 += ((r3 + 32) << 7))))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
625 (write-multibyte-character r0 r1)
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
626 (read r0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
627 (repeat)))
49598
0d8b17d428b5 Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents: 49028
diff changeset
628
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
629 ;; Read the 4th bytes.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
630 (read r3)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
631 (if ((r3 & #b11000000) != #b10000000) ; Invalid 4th byte
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
632 ((call ccl-mule-utf-untrans)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
633 (r0 = r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
634 (call ccl-mule-utf-untrans)
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
635 (r0 = r2)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
636 (call ccl-mule-utf-untrans)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
637 (r6 = ,(charset-id 'latin-iso8859-1))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
638 ;; Handle it in the next loop.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
639 (r0 = r3)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
640 (repeat)))
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
641
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
642 (if (r0 < #xF8)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
643 ;; 4-byte encoding:
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
644 ;; wwwzzzzzzyyyyyyxxxxxx = 11110www 10zzzzzz 10yyyyyy 10xxxxxx
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
645 ;; keep those bytes as eight-bit-{control|graphic}
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
646 ;; Fixme: allow lookup in utf-subst-table-for-decode.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
647 ((r4 = ((r0 & #x7) << 18))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
648 (r4 |= ((r1 & #x3F) << 12))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
649 (r4 |= ((r2 & #x3F) << 6))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
650 (r4 |= (r3 & #x3F))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
651
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
652 (if (r4 < #x10000) ; `overlong sequence'
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
653 ((call ccl-mule-utf-untrans)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
654 (r0 = r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
655 (call ccl-mule-utf-untrans)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
656 (r0 = r2)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
657 (call ccl-mule-utf-untrans)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
658 (r0 = r3)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
659 (call ccl-mule-utf-untrans))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
660 ((r0 = r4)
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
661 (call ccl-mule-utf-untrans))))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
662
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
663 ;; Unsupported sequence.
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
664 ((call ccl-mule-utf-untrans)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
665 (r0 = r1)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
666 (call ccl-mule-utf-untrans)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
667 (r0 = r2)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
668 (call ccl-mule-utf-untrans)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
669 (r0 = r3)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
670 (call ccl-mule-utf-untrans)))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
671 (r6 = ,(charset-id 'latin-iso8859-1))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
672 (read r0)
50085
575609f03daa (ccl-decode-mule-utf-8): Don't loose
Kenichi Handa <handa@m17n.org>
parents: 49598
diff changeset
673 (repeat)))
575609f03daa (ccl-decode-mule-utf-8): Don't loose
Kenichi Handa <handa@m17n.org>
parents: 49598
diff changeset
674
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
675
50085
575609f03daa (ccl-decode-mule-utf-8): Don't loose
Kenichi Handa <handa@m17n.org>
parents: 49598
diff changeset
676 ;; At EOF...
575609f03daa (ccl-decode-mule-utf-8): Don't loose
Kenichi Handa <handa@m17n.org>
parents: 49598
diff changeset
677 (if (r0 >= 0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
678 ;; r0 >= #x80
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
679 ((call ccl-mule-utf-untrans)
50085
575609f03daa (ccl-decode-mule-utf-8): Don't loose
Kenichi Handa <handa@m17n.org>
parents: 49598
diff changeset
680 (if (r1 >= 0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
681 ((r0 = r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
682 (call ccl-mule-utf-untrans)
50085
575609f03daa (ccl-decode-mule-utf-8): Don't loose
Kenichi Handa <handa@m17n.org>
parents: 49598
diff changeset
683 (if (r2 >= 0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
684 ((r0 = r2)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
685 (call ccl-mule-utf-untrans)
50085
575609f03daa (ccl-decode-mule-utf-8): Don't loose
Kenichi Handa <handa@m17n.org>
parents: 49598
diff changeset
686 (if (r3 >= 0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
687 ((r0 = r3)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
688 (call ccl-mule-utf-untrans))))))))))
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
689
36243
a05ae5420f85 Doc and commentary fixes.
Dave Love <fx@gnu.org>
parents: 35542
diff changeset
690 "CCL program to decode UTF-8.
36465
f968e313e8ad Doc fixes.
Dave Love <fx@gnu.org>
parents: 36423
diff changeset
691 Basic decoding is done into the charsets ascii, latin-iso8859-1 and
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
692 mule-unicode-*, but see also `utf-fragmentation-table' and
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
693 `ucs-mule-cjk-to-unicode'.
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
694 Encodings of un-representable Unicode characters are decoded asis into
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
695 eight-bit-control and eight-bit-graphic characters.")
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
696
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
697 (define-ccl-program ccl-mule-utf-8-encode-untrans
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
698 ;; UTF-8 decoder generates an UTF-8 sequence represented by a
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
699 ;; sequence eight-bit-control/graphic chars for an untranslatable
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
700 ;; character and an invalid byte.
56562
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
701 ;;
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
702 ;; This CCL parses that sequence (the first byte is already in r1),
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
703 ;; writes out the original bytes of that sequence, and sets r5 to
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
704 ;; -1.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
705 ;;
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
706 ;; If the eight-bit-control/graphic sequence is shorter than what r1
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
707 ;; suggests, it sets r5 and r6 to the last character read that
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
708 ;; should be handled by the next loop of a caller.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
709 ;;
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
710 ;; Note: For UTF-8 validation, we only check if a character is
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
711 ;; eight-bit-control/graphic or not. It may result in incorrect
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
712 ;; handling of random binary data, but such a data can't be encoded
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
713 ;; by UTF-8 anyway. At least, UTF-8 decoders doesn't generate such
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
714 ;; a sequence even if a source contains invalid byte-sequence.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
715 `(0
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
716 (;; Read the 2nd byte.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
717 (read-multibyte-character r5 r6)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
718 (r0 = (r5 != ,(charset-id 'eight-bit-control)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
719 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
56562
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
720 ((write r1) ; invalid UTF-8
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
721 (r1 = -1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
722 (end)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
723
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
724 (if (r1 <= #xC3)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
725 ;; 2-byte sequence for an originally invalid byte.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
726 ((r6 &= #x3F)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
727 (r6 |= ((r1 & #x1F) << 6))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
728 (write r6)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
729 (r5 = -1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
730 (end)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
731
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
732 (write r1 r6)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
733 (r2 = r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
734 (r1 = -1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
735 ;; Read the 3rd byte.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
736 (read-multibyte-character r5 r6)
56562
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
737 (r0 = (r5 != ,(charset-id 'eight-bit-control)))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
738 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
739 (end)) ; invalid UTF-8
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
740 (write r6)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
741 (if (r2 < #xF0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
742 ;; 3-byte sequence for an untranslated character.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
743 ((r5 = -1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
744 (end)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
745 ;; Read the 4th byte.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
746 (read-multibyte-character r5 r6)
56562
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
747 (r0 = (r5 != ,(charset-id 'eight-bit-control)))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
748 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
749 (end)) ; invalid UTF-8
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
750 ;; 4-byte sequence for an untranslated character.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
751 (write r6)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
752 (r5 = -1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
753 (end))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
754
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
755 ;; At EOF...
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
756 ((r5 = -1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
757 (if (r1 >= 0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
758 (write r1)))))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
759
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
760 (define-ccl-program ccl-encode-mule-utf-8
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
761 `(1
37097
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
762 ((r5 = -1)
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
763 (loop
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
764 (if (r5 < 0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
765 (read-multibyte-character r0 r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
766 ;; Pre-read character is in r5 (charset-ID) and r6 (code-point).
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
767 ((r0 = r5)
37097
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
768 (r1 = r6)
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
769 (r5 = -1)))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
770 (translate-character utf-translation-table-for-encode r0 r1)
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
771
37097
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
772 (if (r0 == ,(charset-id 'ascii))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
773 (write-repeat r1))
37097
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
774
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
775 (if (r0 == ,(charset-id 'latin-iso8859-1))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
776 ;; r1 scalar utf-8
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
777 ;; 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
778 ;; 20 0000 0000 1010 0000 1100 0010 1010 0000
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
779 ;; 7f 0000 0000 1111 1111 1100 0011 1011 1111
56095
4ec2da03a87c (ccl-encode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56056
diff changeset
780 ((write ((r1 >> 6) | #xc2))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
781 (r1 &= #x3f)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
782 (r1 |= #x80)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
783 (write-repeat r1)))
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
784
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
785 (if (r0 == ,(charset-id 'mule-unicode-0100-24ff))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
786 ((r0 = ((((r1 & #x3f80) >> 7) - 32) * 96))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
787 ;; #x3f80 == (0011 1111 1000 0000)b
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
788 (r1 &= #x7f)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
789 (r1 += (r0 + 224)) ; 240 == -32 + #x0100
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
790 ;; now r1 holds scalar value
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
791 (if (r1 < #x0800)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
792 ;; 2byte encoding
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
793 ((write ((r1 >> 6) | #xC0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
794 (r1 &= #x3F)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
795 (r1 |= #x80)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
796 (write-repeat r1))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
797 ;; 3byte encoding
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
798 ((write ((r1 >> 12) | #xE0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
799 (write (((r1 & #x0FC0) >> 6) | #x80))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
800 (r1 &= #x3F)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
801 (r1 |= #x80)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
802 (write-repeat r1)))))
37097
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
803
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
804 (if (r0 == ,(charset-id 'mule-unicode-2500-33ff))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
805 ((r0 = ((((r1 & #x3f80) >> 7) - 32) * 96))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
806 (r1 &= #x7f)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
807 (r1 += (r0 + 9440)) ; 9440 == -32 + #x2500
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
808 ;; now r1 holds scalar value
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
809 (write ((r1 >> 12) | #xE0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
810 (write (((r1 & #x0FC0) >> 6) | #x80))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
811 (r1 &= #x3F)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
812 (r1 |= #x80)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
813 (write-repeat r1)))
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
814
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
815 (if (r0 == ,(charset-id 'mule-unicode-e000-ffff))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
816 ((r0 = ((((r1 & #x3f80) >> 7) - 32) * 96))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
817 (r1 &= #x7f)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
818 (r1 += (r0 + 57312)) ; 57312 == -32 + #xe000
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
819 ;; now r1 holds scalar value
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
820 (write ((r1 >> 12) | #xE0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
821 (write (((r1 & #x0FC0) >> 6) | #x80))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
822 (r1 &= #x3F)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
823 (r1 |= #x80)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
824 (write-repeat r1)))
37097
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
825
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
826 (if (r0 == ,(charset-id 'eight-bit-control))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
827 ;; r1 scalar utf-8
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
828 ;; 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
829 ;; 80 0000 0000 1000 0000 1100 0010 1000 0000
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
830 ;; 9f 0000 0000 1001 1111 1100 0010 1001 1111
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
831 ((write #xC2)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
832 (write-repeat r1)))
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
833
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
834 (if (r0 == ,(charset-id 'eight-bit-graphic))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
835 ;; r1 scalar utf-8
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
836 ;; 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
837 ;; a0 0000 0000 1010 0000 1100 0010 1010 0000
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
838 ;; ff 0000 0000 1111 1111 1101 1111 1011 1111
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
839 ((r0 = (r1 >= #xC0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
840 (r0 &= (r1 <= #xC3))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
841 (r4 = (r1 >= #xE1))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
842 (r4 &= (r1 <= #xF7))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
843 (r0 |= r4)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
844 (if r0
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
845 ((call ccl-mule-utf-8-encode-untrans)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
846 (repeat))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
847 (write-repeat r1))))
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
848
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
849 (lookup-character utf-subst-table-for-encode r0 r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
850 (if r7 ; lookup succeeded
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
851 (if (r0 < #x800)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
852 ;; 2byte encoding
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
853 ((write ((r0 >> 6) | #xC0))
56095
4ec2da03a87c (ccl-encode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56056
diff changeset
854 (r0 = ((r0 & #x3F) | #x80))
4ec2da03a87c (ccl-encode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56056
diff changeset
855 (write-repeat r0))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
856 ;; 3byte encoding
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
857 ((write ((r0 >> 12) | #xE0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
858 (write (((r0 & #x0FC0) >> 6) | #x80))
56095
4ec2da03a87c (ccl-encode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56056
diff changeset
859 (r0 = ((r0 & #x3F) | #x80))
4ec2da03a87c (ccl-encode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56056
diff changeset
860 (write-repeat r0))))
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
861
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
862 ;; Unsupported character.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
863 ;; Output U+FFFD, which is `ef bf bd' in UTF-8.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
864 (write #xef)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
865 (write #xbf)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
866 (write-repeat #xbd))))
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
867 "CCL program to encode into UTF-8.")
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
868
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
869
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
870 (define-ccl-program ccl-untranslated-to-ucs
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
871 `(0
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
872 (if (r1 == 0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
873 nil
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
874 (if (r0 <= #xC3) ; 2-byte encoding
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
875 ((r0 = ((r0 & #x3) << 6))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
876 (r0 |= (r1 & #x3F))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
877 (r1 = 2))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
878 (if (r2 == 0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
879 (r1 = 0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
880 (if (r0 < #xF0) ; 3-byte encoding, as above
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
881 ((r0 = ((r0 & #xF) << 12))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
882 (r0 |= ((r1 & #x3F) << 6))
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
883 (r0 |= (r2 & #x3F))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
884 (r1 = 3))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
885 (if (r3 == 0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
886 (r1 = 0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
887 ((r0 = ((r0 & #x7) << 18))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
888 (r0 |= ((r1 & #x3F) << 12))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
889 (r0 |= ((r2 & #x3F) << 6))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
890 (r0 |= (r3 & #x3F))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
891 (r1 = 4))))))))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
892 "Decode 2-, 3-, or 4-byte sequences in r0, r1, r2 [,r3] to unicodes in r0.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
893 Set r1 to the byte length. r0 == 0 for invalid sequence.")
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
894
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
895 (defvar utf-8-ccl-regs (make-vector 8 0))
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
896
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
897 (defsubst utf-8-untranslated-to-ucs ()
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
898 "Return the UCS code for an untranslated sequence of raw bytes t point.
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
899 Only for 3- or 4-byte sequences."
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
900 (aset utf-8-ccl-regs 0 (or (char-after) 0))
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
901 (aset utf-8-ccl-regs 1 (or (char-after (1+ (point))) 0))
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
902 (aset utf-8-ccl-regs 2 (or (char-after (+ 2 (point))) 0))
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
903 (aset utf-8-ccl-regs 3 (or (char-after (+ 3 (point))) 0))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
904 (ccl-execute 'ccl-untranslated-to-ucs utf-8-ccl-regs))
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
905
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
906 (defun utf-8-help-echo (window object position)
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
907 (format "Untranslated Unicode U+%04X"
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
908 (get-char-property position 'untranslated-utf-8 object)))
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
909
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
910 ;; We compose the untranslatable sequences into a single character,
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
911 ;; and move point to the next character.
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
912 ;; This is infelicitous for editing, because there's currently no
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
913 ;; mechanism for treating compositions as atomic, but is OK for
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
914 ;; display. They are composed to U+FFFD with help-echo which
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
915 ;; indicates the unicodes they represent. This function GCs too much.
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
916
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
917 ;; If utf-translate-cjk-mode is non-nil, this function is called with
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
918 ;; HASH-TABLE which translates CJK characters into some of CJK
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
919 ;; charsets.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
920
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
921 (defsubst utf-8-compose (hash-table)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
922 "Put a suitable composition on an untranslatable sequence at point.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
923 If HASH-TABLE is non-nil, try to translate CJK characters by it at first.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
924 Move point to the end of the sequence."
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
925 (utf-8-untranslated-to-ucs)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
926 (let ((l (aref utf-8-ccl-regs 1))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
927 ch)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
928 (if (> l 0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
929 (if (and hash-table
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
930 (setq ch (gethash (aref utf-8-ccl-regs 0) hash-table)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
931 (progn
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
932 (insert ch)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
933 (delete-region (point) (min (point-max) (+ l (point)))))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
934 (setq ch (aref utf-8-ccl-regs 0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
935 (put-text-property (point) (min (point-max) (+ l (point)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
936 'untranslated-utf-8 ch)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
937 (put-text-property (point) (min (point-max) (+ l (point)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
938 'help-echo 'utf-8-help-echo)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
939 (if (= l 2)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
940 (put-text-property (point) (min (point-max) (+ l (point)))
66062
b23c01e98a4b (utf-8-compose): Display an invalid UTF-8 byte with `escape-glyph'
Kenichi Handa <handa@m17n.org>
parents: 64085
diff changeset
941 'display (propertize (format "\\%03o" ch)
b23c01e98a4b (utf-8-compose): Display an invalid UTF-8 byte with `escape-glyph'
Kenichi Handa <handa@m17n.org>
parents: 64085
diff changeset
942 'face 'escape-glyph))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
943 (compose-region (point) (+ l (point)) ?$,3u=(B))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
944 (forward-char l))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
945 (forward-char 1))))
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
946
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
947 (defcustom utf-8-compose-scripts nil
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
948 "*Non-nil means compose various scripts on decoding utf-8 text."
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
949 :group 'mule
59996
aac0a33f5772 Change release version from 21.4 to 22.1 throughout.
Kim F. Storm <storm@cua.dk>
parents: 59096
diff changeset
950 :version "22.1"
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
951 :type 'boolean)
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
952
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
953 (defun utf-8-post-read-conversion (length)
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
954 "Compose untranslated utf-8 sequences into single characters.
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
955 If `utf-translate-cjk-mode' is non-nil, tries to translate CJK characters.
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
956 Also compose particular scripts if `utf-8-compose-scripts' is non-nil."
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
957 (save-excursion
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
958 (save-restriction
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
959 (narrow-to-region (point) (+ (point) length))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
960 ;; Can't do eval-when-compile to insert a multibyte constant
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
961 ;; version of the string in the loop, since it's always loaded as
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
962 ;; unibyte from a byte-compiled file.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
963 (let ((range (string-as-multibyte "^\xc0-\xc3\xe1-\xf7"))
56800
752ef76fcc08 (utf-8-post-read-conversion): If the
Kenichi Handa <handa@m17n.org>
parents: 56562
diff changeset
964 (buffer-multibyte enable-multibyte-characters)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
965 hash-table ch)
56800
752ef76fcc08 (utf-8-post-read-conversion): If the
Kenichi Handa <handa@m17n.org>
parents: 56562
diff changeset
966 (set-buffer-multibyte t)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
967 (when utf-translate-cjk-mode
57727
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
968 (unless utf-translate-cjk-lang-env
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
969 ;; Check these characters in utf-translate-cjk-range.
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
970 ;; We may have to translate them to CJK charsets.
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
971 (skip-chars-forward
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
972 (concat range utf-translate-cjk-unicode-range-string))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
973 (unless (eobp)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
974 (utf-translate-cjk-load-tables)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
975 (setq range
57737
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
976 (concat range utf-translate-cjk-unicode-range-string)))
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
977 (setq hash-table (get 'utf-subst-table-for-decode
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
978 'translation-hash-table))))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
979 (while (and (skip-chars-forward range)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
980 (not (eobp)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
981 (setq ch (following-char))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
982 (if (< ch 256)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
983 (utf-8-compose hash-table)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
984 (if (and hash-table
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
985 (setq ch (gethash (encode-char ch 'ucs) hash-table)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
986 (progn
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
987 (insert ch)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
988 (delete-char 1))
56800
752ef76fcc08 (utf-8-post-read-conversion): If the
Kenichi Handa <handa@m17n.org>
parents: 56562
diff changeset
989 (forward-char 1))))
752ef76fcc08 (utf-8-post-read-conversion): If the
Kenichi Handa <handa@m17n.org>
parents: 56562
diff changeset
990 (or buffer-multibyte
752ef76fcc08 (utf-8-post-read-conversion): If the
Kenichi Handa <handa@m17n.org>
parents: 56562
diff changeset
991 (set-buffer-multibyte nil)))
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
992
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
993 (when (and utf-8-compose-scripts (> length 1))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
994 ;; These currently have definitions which cover the relevant
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
995 ;; unicodes. We could avoid loading thai-util &c by checking
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
996 ;; whether the region contains any characters with the appropriate
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
997 ;; categories. There aren't yet Unicode-based rules for Tibetan.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
998 (diacritic-compose-region (point-max) (point-min))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
999 (thai-compose-region (point-max) (point-min))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1000 (lao-compose-region (point-max) (point-min))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1001 (devanagari-compose-region (point-max) (point-min))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1002 (malayalam-compose-region (point-max) (point-min))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1003 (tamil-compose-region (point-max) (point-min)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1004 (- (point-max) (point-min)))))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1005
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1006 (defun utf-8-pre-write-conversion (beg end)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1007 "Prepare for `utf-translate-cjk-mode' to encode text between BEG and END.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1008 This is used as a post-read-conversion of utf-8 coding system."
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1009 (if (and utf-translate-cjk-mode
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1010 (not utf-translate-cjk-lang-env)
76112
bb3eab7f9463 (utf-8-pre-write-conversion): Handle the
Kenichi Handa <handa@m17n.org>
parents: 75347
diff changeset
1011 (if (stringp beg)
bb3eab7f9463 (utf-8-pre-write-conversion): Handle the
Kenichi Handa <handa@m17n.org>
parents: 75347
diff changeset
1012 (string-match "\\cc\\|\\cj\\|\\ch" beg)
bb3eab7f9463 (utf-8-pre-write-conversion): Handle the
Kenichi Handa <handa@m17n.org>
parents: 75347
diff changeset
1013 (save-excursion
bb3eab7f9463 (utf-8-pre-write-conversion): Handle the
Kenichi Handa <handa@m17n.org>
parents: 75347
diff changeset
1014 (goto-char beg)
bb3eab7f9463 (utf-8-pre-write-conversion): Handle the
Kenichi Handa <handa@m17n.org>
parents: 75347
diff changeset
1015 (re-search-forward "\\cc\\|\\cj\\|\\ch" end t))))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1016 (utf-translate-cjk-load-tables))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1017 nil)
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1018
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1019 (make-coding-system
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1020 'mule-utf-8 4 ?u
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1021 "UTF-8 encoding for Emacs-supported Unicode characters.
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1022 It supports Unicode characters of these ranges:
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1023 U+0000..U+33FF, U+E000..U+FFFF.
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1024 They correspond to these Emacs character sets:
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1025 ascii, latin-iso8859-1, mule-unicode-0100-24ff,
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1026 mule-unicode-2500-33ff, mule-unicode-e000-ffff
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1027
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1028 On decoding (e.g. reading a file), Unicode characters not in the above
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1029 ranges are decoded into sequences of eight-bit-control and
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1030 eight-bit-graphic characters to preserve their byte sequences. The
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1031 byte sequence is preserved on i/o for valid utf-8, but not necessarily
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1032 for invalid utf-8.
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1033
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1034 On encoding (e.g. writing a file), Emacs characters not belonging to
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1035 any of the character sets listed above are encoded into the UTF-8 byte
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1036 sequence representing U+FFFD (REPLACEMENT CHARACTER)."
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1037
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1038 '(ccl-decode-mule-utf-8 . ccl-encode-mule-utf-8)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1039 `((safe-charsets
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1040 ascii
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1041 eight-bit-control
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1042 eight-bit-graphic
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1043 latin-iso8859-1
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1044 mule-unicode-0100-24ff
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1045 mule-unicode-2500-33ff
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1046 mule-unicode-e000-ffff
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1047 ,@(if utf-translate-cjk-mode
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1048 utf-translate-cjk-charsets))
36371
f6bb3ed752b4 (mule-utf-8): Set correct value for valid-codes property.
Kenichi Handa <handa@m17n.org>
parents: 36243
diff changeset
1049 (mime-charset . utf-8)
36423
aa776838b660 (mule-utf-8): Set coding-category property to coding-category-utf-8.
Kenichi Handa <handa@m17n.org>
parents: 36371
diff changeset
1050 (coding-category . coding-category-utf-8)
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1051 (valid-codes (0 . 255))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1052 (pre-write-conversion . utf-8-pre-write-conversion)
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1053 (post-read-conversion . utf-8-post-read-conversion)
50766
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
1054 (translation-table-for-encode . utf-translation-table-for-encode)
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1055 (dependency unify-8859-on-encoding-mode
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1056 unify-8859-on-decoding-mode
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1057 utf-fragment-on-decoding
55437
6e677a935fe9 Fix references to utf-translate-cjk into utf-translate-cjk-mode.
Andreas Schwab <schwab@suse.de>
parents: 54304
diff changeset
1058 utf-translate-cjk-mode)))
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1059
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1060 (define-coding-system-alias 'utf-8 'mule-utf-8)
38436
b174db545cfd Some fixes to follow coding conventions.
Pavel Janík <Pavel@Janik.cz>
parents: 37934
diff changeset
1061
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1062 ;; I think this needs special private charsets defined for the
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1063 ;; untranslated sequences, if it's going to work well.
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1064
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1065 ;;; (defun utf-8-compose-function (pos to pattern &optional string)
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1066 ;;; (let* ((prop (get-char-property pos 'composition string))
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1067 ;;; (l (and prop (- (cadr prop) (car prop)))))
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1068 ;;; (cond ((and l (> l (- to pos)))
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1069 ;;; (delete-region pos to))
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1070 ;;; ((and (> (char-after pos) 224)
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1071 ;;; (< (char-after pos) 256)
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1072 ;;; (save-restriction
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1073 ;;; (narrow-to-region pos to)
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1074 ;;; (utf-8-compose)))
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1075 ;;; t))))
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1076
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1077 ;;; (dotimes (i 96)
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1078 ;;; (aset composition-function-table
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1079 ;;; (+ 128 i)
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1080 ;;; `((,(string-as-multibyte "[\200-\237\240-\377]")
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1081 ;;; . utf-8-compose-function))))
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1082
52401
695cf19ef79e Add arch taglines
Miles Bader <miles@gnu.org>
parents: 52284
diff changeset
1083 ;;; arch-tag: b08735b7-753b-4ae6-b754-0f3efe4515c5
38436
b174db545cfd Some fixes to follow coding conventions.
Pavel Janík <Pavel@Janik.cz>
parents: 37934
diff changeset
1084 ;;; utf-8.el ends here