annotate lisp/international/utf-8.el @ 71710:dbbc0b93cfeb

(Forcing Redisplay, Displaying Messages, Temporary Displays, Font Selection, Auto Faces, Font Lookup, Fringe Indicators, Display Margins, Image Descriptors, Showing Images, Image Cache, Button Types, Making Buttons, Manipulating Buttons, Button Buffer Commands, Display Table Format, Glyphs): Remove @tindex.
author Eli Zaretskii <eliz@gnu.org>
date Sat, 08 Jul 2006 18:11:49 +0000
parents b23c01e98a4b
children 43ccf7c7d312
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
1 ;;; utf-8.el --- UTF-8 decoding/encoding support -*- coding: iso-2022-7bit -*-
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
2
62274
c36561fe0657 Fix copyrights.
Kenichi Handa <handa@m17n.org>
parents: 59996
diff changeset
3 ;; Copyright (C) 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
c36561fe0657 Fix copyrights.
Kenichi Handa <handa@m17n.org>
parents: 59996
diff changeset
4 ;; Copyright (C) 2001, 2002, 2003, 2004
c36561fe0657 Fix copyrights.
Kenichi Handa <handa@m17n.org>
parents: 59996
diff changeset
5 ;; National Institute of Advanced Industrial Science and Technology (AIST)
c36561fe0657 Fix copyrights.
Kenichi Handa <handa@m17n.org>
parents: 59996
diff changeset
6 ;; Registration Number H14PRO021
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
7
37097
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
8 ;; Author: TAKAHASHI Naoto <ntakahas@m17n.org>
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
9 ;; Maintainer: FSF
36243
a05ae5420f85 Doc and commentary fixes.
Dave Love <fx@gnu.org>
parents: 35542
diff changeset
10 ;; Keywords: multilingual, Unicode, UTF-8, i18n
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
11
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
12 ;; This file is part of GNU Emacs.
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
13
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
14 ;; GNU Emacs is free software; you can redistribute it and/or modify
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
15 ;; it under the terms of the GNU General Public License as published by
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
16 ;; the Free Software Foundation; either version 2, or (at your option)
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
17 ;; any later version.
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
18
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
19 ;; GNU Emacs is distributed in the hope that it will be useful,
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
22 ;; GNU General Public License for more details.
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
23
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
24 ;; You should have received a copy of the GNU General Public License
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the
64085
18a818a2ee7c Update FSF's address.
Lute Kamstra <lute@gnu.org>
parents: 62274
diff changeset
26 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18a818a2ee7c Update FSF's address.
Lute Kamstra <lute@gnu.org>
parents: 62274
diff changeset
27 ;; Boston, MA 02110-1301, USA.
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
28
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
29 ;;; Commentary:
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
30
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
31 ;; The coding-system `mule-utf-8' basically supports encoding/decoding
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
32 ;; of the following character sets to and from UTF-8:
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
33 ;;
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
34 ;; ascii
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
35 ;; eight-bit-control
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
36 ;; latin-iso8859-1
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
37 ;; mule-unicode-0100-24ff
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
38 ;; mule-unicode-2500-33ff
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
39 ;; mule-unicode-e000-ffff
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
40 ;;
36243
a05ae5420f85 Doc and commentary fixes.
Dave Love <fx@gnu.org>
parents: 35542
diff changeset
41 ;; On decoding, Unicode characters that do not fit into the above
a05ae5420f85 Doc and commentary fixes.
Dave Love <fx@gnu.org>
parents: 35542
diff changeset
42 ;; character sets are handled as `eight-bit-control' or
a05ae5420f85 Doc and commentary fixes.
Dave Love <fx@gnu.org>
parents: 35542
diff changeset
43 ;; `eight-bit-graphic' characters to retain the information about the
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
44 ;; original byte sequence and text properties record the corresponding
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
45 ;; unicode.
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
46 ;;
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
47 ;; Fixme: note that reading and writing invalid utf-8 may not be
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
48 ;; idempotent -- to represent the bytes to fix that needs a new charset.
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
49 ;;
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
50 ;; Characters from other character sets can be encoded with mule-utf-8
48848
4eb835c1257d (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 47720
diff changeset
51 ;; by populating the translation table
50179
65bb5afb37ef (utf-fragment-on-decoding): Don't call
Kenichi Handa <handa@m17n.org>
parents: 50085
diff changeset
52 ;; `utf-translation-table-for-encode'. Hash tables
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
53 ;; `utf-subst-table-for-decode' and `utf-subst-table-for-encode' are
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
54 ;; used to support encoding and decoding of about a quarter of the CJK
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
55 ;; space between U+3400 and U+DFFF.
36243
a05ae5420f85 Doc and commentary fixes.
Dave Love <fx@gnu.org>
parents: 35542
diff changeset
56
54304
d61b01de8cdf UTF-8 is now RFC 3629.
Eli Zaretskii <eliz@gnu.org>
parents: 52725
diff changeset
57 ;; UTF-8 is defined in RFC 3629. A sketch of the encoding is:
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
58
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
59 ;; scalar | utf-8
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
60 ;; value | 1st byte | 2nd byte | 3rd byte
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
61 ;; --------------------+-----------+-----------+----------
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
62 ;; 0000 0000 0xxx xxxx | 0xxx xxxx | |
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
63 ;; 0000 0yyy yyxx xxxx | 110y yyyy | 10xx xxxx |
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
64 ;; zzzz yyyy yyxx xxxx | 1110 zzzz | 10yy yyyy | 10xx xxxx
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
65
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
66 ;;; Code:
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
67
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
68 (defvar ucs-mule-to-mule-unicode (make-char-table 'translation-table nil)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
69 "Char table mapping characters to latin-iso8859-1 or mule-unicode-*.
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
70
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
71 If `unify-8859-on-encoding-mode' is non-nil, this table populates the
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
72 translation-table named `utf-translation-table-for-encode'.")
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
73
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
74 (define-translation-table 'utf-translation-table-for-encode)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
75
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
76
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
77 ;; Map Cyrillic and Greek to iso-8859 charsets, which take half the
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
78 ;; space of mule-unicode. For Latin scripts this isn't very
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
79 ;; important. Hebrew and Arabic might go here too when there's proper
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
80 ;; support for them.
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
81
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
82 (defvar utf-fragmentation-table (make-char-table 'translation-table nil)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
83 "Char-table normally mapping non-Latin mule-unicode-* chars to iso-8859-*.
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
84
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
85 If `utf-fragment-on-decoding' is non-nil, this table populates the
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
86 translation-table named `utf-translation-table-for-decode'")
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
87
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
88 (defvar utf-defragmentation-table (make-char-table 'translation-table nil)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
89 "Char-table for reverse mapping of `utf-fragmentation-table'.
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
90
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
91 If `utf-fragment-on-decoding' is non-nil and
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
92 `unify-8859-on-encoding-mode' is nil, this table populates the
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
93 translation-table named `utf-translation-table-for-encode'")
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
94
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
95 (define-translation-table 'utf-translation-table-for-decode)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
96
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
97
48882
d17c0d3e36ba (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 48848
diff changeset
98 (defvar ucs-mule-cjk-to-unicode (make-hash-table :test 'eq)
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
99 "Hash table mapping Emacs CJK character sets to Unicode code points.
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
100
55437
6e677a935fe9 Fix references to utf-translate-cjk into utf-translate-cjk-mode.
Andreas Schwab <schwab@suse.de>
parents: 54304
diff changeset
101 If `utf-translate-cjk-mode' is non-nil, this table populates the
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
102 translation-hash-table named `utf-subst-table-for-encode'.")
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
103
48882
d17c0d3e36ba (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 48848
diff changeset
104 (define-translation-hash-table 'utf-subst-table-for-encode
d17c0d3e36ba (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 48848
diff changeset
105 ucs-mule-cjk-to-unicode)
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
106
48882
d17c0d3e36ba (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 48848
diff changeset
107 (defvar ucs-unicode-to-mule-cjk (make-hash-table :test 'eq)
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
108 "Hash table mapping Unicode code points to Emacs CJK character sets.
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
109
55437
6e677a935fe9 Fix references to utf-translate-cjk into utf-translate-cjk-mode.
Andreas Schwab <schwab@suse.de>
parents: 54304
diff changeset
110 If `utf-translate-cjk-mode' is non-nil, this table populates the
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
111 translation-hash-table named `utf-subst-table-for-decode'.")
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
112
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
113 (define-translation-hash-table 'utf-subst-table-for-decode
48882
d17c0d3e36ba (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 48848
diff changeset
114 ucs-unicode-to-mule-cjk)
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
115
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
116 (mapc
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
117 (lambda (pair)
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
118 (aset utf-fragmentation-table (car pair) (cdr pair))
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
119 (aset utf-defragmentation-table (cdr pair) (car pair)))
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
120 '((?$,1&d(B . ?,F4(B) (?$,1&e(B . ?,F5(B) (?$,1&f(B . ?,F6(B) (?$,1&h(B . ?,F8(B) (?$,1&i(B . ?,F9(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
121 (?$,1&j(B . ?,F:(B) (?$,1&l(B . ?,F<(B) (?$,1&n(B . ?,F>(B) (?$,1&o(B . ?,F?(B) (?$,1&p(B . ?,F@(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
122 (?$,1&q(B . ?,FA(B) (?$,1&r(B . ?,FB(B) (?$,1&s(B . ?,FC(B) (?$,1&t(B . ?,FD(B) (?$,1&u(B . ?,FE(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
123 (?$,1&v(B . ?,FF(B) (?$,1&w(B . ?,FG(B) (?$,1&x(B . ?,FH(B) (?$,1&y(B . ?,FI(B) (?$,1&z(B . ?,FJ(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
124 (?$,1&{(B . ?,FK(B) (?$,1&|(B . ?,FL(B) (?$,1&}(B . ?,FM(B) (?$,1&~(B . ?,FN(B) (?$,1&(B . ?,FO(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
125 (?$,1' (B . ?,FP(B) (?$,1'!(B . ?,FQ(B) (?$,1'#(B . ?,FS(B) (?$,1'$(B . ?,FT(B) (?$,1'%(B . ?,FU(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
126 (?$,1'&(B . ?,FV(B) (?$,1''(B . ?,FW(B) (?$,1'((B . ?,FX(B) (?$,1')(B . ?,FY(B) (?$,1'*(B . ?,FZ(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
127 (?$,1'+(B . ?,F[(B) (?$,1',(B . ?,F\(B) (?$,1'-(B . ?,F](B) (?$,1'.(B . ?,F^(B) (?$,1'/(B . ?,F_(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
128 (?$,1'0(B . ?,F`(B) (?$,1'1(B . ?,Fa(B) (?$,1'2(B . ?,Fb(B) (?$,1'3(B . ?,Fc(B) (?$,1'4(B . ?,Fd(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
129 (?$,1'5(B . ?,Fe(B) (?$,1'6(B . ?,Ff(B) (?$,1'7(B . ?,Fg(B) (?$,1'8(B . ?,Fh(B) (?$,1'9(B . ?,Fi(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
130 (?$,1':(B . ?,Fj(B) (?$,1';(B . ?,Fk(B) (?$,1'<(B . ?,Fl(B) (?$,1'=(B . ?,Fm(B) (?$,1'>(B . ?,Fn(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
131 (?$,1'?(B . ?,Fo(B) (?$,1'@(B . ?,Fp(B) (?$,1'A(B . ?,Fq(B) (?$,1'B(B . ?,Fr(B) (?$,1'C(B . ?,Fs(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
132 (?$,1'D(B . ?,Ft(B) (?$,1'E(B . ?,Fu(B) (?$,1'F(B . ?,Fv(B) (?$,1'G(B . ?,Fw(B) (?$,1'H(B . ?,Fx(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
133 (?$,1'I(B . ?,Fy(B) (?$,1'J(B . ?,Fz(B) (?$,1'K(B . ?,F{(B) (?$,1'L(B . ?,F|(B) (?$,1'M(B . ?,F}(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
134 (?$,1'N(B . ?,F~(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
135
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
136 (?$,1(!(B . ?,L!(B) (?$,1("(B . ?,L"(B) (?$,1(#(B . ?,L#(B) (?$,1($(B . ?,L$(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
137 (?$,1(%(B . ?,L%(B) (?$,1(&(B . ?,L&(B) (?$,1('(B . ?,L'(B) (?$,1(((B . ?,L((B) (?$,1()(B . ?,L)(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
138 (?$,1(*(B . ?,L*(B) (?$,1(+(B . ?,L+(B) (?$,1(,(B . ?,L,(B) (?$,1(.(B . ?,L.(B) (?$,1(/(B . ?,L/(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
139 (?$,1(0(B . ?,L0(B) (?$,1(1(B . ?,L1(B) (?$,1(2(B . ?,L2(B) (?$,1(3(B . ?,L3(B) (?$,1(4(B . ?,L4(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
140 (?$,1(5(B . ?,L5(B) (?$,1(6(B . ?,L6(B) (?$,1(7(B . ?,L7(B) (?$,1(8(B . ?,L8(B) (?$,1(9(B . ?,L9(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
141 (?$,1(:(B . ?,L:(B) (?$,1(;(B . ?,L;(B) (?$,1(<(B . ?,L<(B) (?$,1(=(B . ?,L=(B) (?$,1(>(B . ?,L>(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
142 (?$,1(?(B . ?,L?(B) (?$,1(@(B . ?,L@(B) (?$,1(A(B . ?,LA(B) (?$,1(B(B . ?,LB(B) (?$,1(C(B . ?,LC(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
143 (?$,1(D(B . ?,LD(B) (?$,1(E(B . ?,LE(B) (?$,1(F(B . ?,LF(B) (?$,1(G(B . ?,LG(B) (?$,1(H(B . ?,LH(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
144 (?$,1(I(B . ?,LI(B) (?$,1(J(B . ?,LJ(B) (?$,1(K(B . ?,LK(B) (?$,1(L(B . ?,LL(B) (?$,1(M(B . ?,LM(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
145 (?$,1(N(B . ?,LN(B) (?$,1(O(B . ?,LO(B) (?$,1(P(B . ?,LP(B) (?$,1(Q(B . ?,LQ(B) (?$,1(R(B . ?,LR(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
146 (?$,1(S(B . ?,LS(B) (?$,1(T(B . ?,LT(B) (?$,1(U(B . ?,LU(B) (?$,1(V(B . ?,LV(B) (?$,1(W(B . ?,LW(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
147 (?$,1(X(B . ?,LX(B) (?$,1(Y(B . ?,LY(B) (?$,1(Z(B . ?,LZ(B) (?$,1([(B . ?,L[(B) (?$,1(\(B . ?,L\(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
148 (?$,1(](B . ?,L](B) (?$,1(^(B . ?,L^(B) (?$,1(_(B . ?,L_(B) (?$,1(`(B . ?,L`(B) (?$,1(a(B . ?,La(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
149 (?$,1(b(B . ?,Lb(B) (?$,1(c(B . ?,Lc(B) (?$,1(d(B . ?,Ld(B) (?$,1(e(B . ?,Le(B) (?$,1(f(B . ?,Lf(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
150 (?$,1(g(B . ?,Lg(B) (?$,1(h(B . ?,Lh(B) (?$,1(i(B . ?,Li(B) (?$,1(j(B . ?,Lj(B) (?$,1(k(B . ?,Lk(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
151 (?$,1(l(B . ?,Ll(B) (?$,1(m(B . ?,Lm(B) (?$,1(n(B . ?,Ln(B) (?$,1(o(B . ?,Lo(B) (?$,1(q(B . ?,Lq(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
152 (?$,1(r(B . ?,Lr(B) (?$,1(s(B . ?,Ls(B) (?$,1(t(B . ?,Lt(B) (?$,1(u(B . ?,Lu(B) (?$,1(v(B . ?,Lv(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
153 (?$,1(w(B . ?,Lw(B) (?$,1(x(B . ?,Lx(B) (?$,1(y(B . ?,Ly(B) (?$,1(z(B . ?,Lz(B) (?$,1({(B . ?,L{(B)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
154 (?$,1(|(B . ?,L|(B) (?$,1(~(B . ?,L~(B) (?$,1((B . ?,L(B)))
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
155
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
156
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
157 (defcustom utf-fragment-on-decoding nil
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
158 "Whether or not to decode some chars in UTF-8/16 text into iso8859 charsets.
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
159 Setting this means that the relevant Cyrillic and Greek characters are
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
160 decoded into the iso8859 charsets rather than into
47231
2d6a05542b5b *** empty log message ***
Kenichi Handa <handa@m17n.org>
parents: 46676
diff changeset
161 mule-unicode-0100-24ff. The iso8859 charsets take half as much space
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
162 in the buffer, but using them may affect how the buffer can be re-encoded
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
163 and may require a different input method to search for them, for instance.
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
164 See `unify-8859-on-decoding-mode' and `unify-8859-on-encoding-mode'
47231
2d6a05542b5b *** empty log message ***
Kenichi Handa <handa@m17n.org>
parents: 46676
diff changeset
165 for mechanisms to make this largely transparent.
2d6a05542b5b *** empty log message ***
Kenichi Handa <handa@m17n.org>
parents: 46676
diff changeset
166
2d6a05542b5b *** empty log message ***
Kenichi Handa <handa@m17n.org>
parents: 46676
diff changeset
167 Setting this variable outside customize has no effect."
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
168 :set (lambda (s v)
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
169 (if v
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
170 (progn
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
171 (define-translation-table 'utf-translation-table-for-decode
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
172 utf-fragmentation-table)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
173 ;; Even if unify-8859-on-encoding-mode is off, make
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
174 ;; mule-utf-* encode characters in
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
175 ;; utf-fragmentation-table.
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
176 (unless (eq (get 'utf-translation-table-for-encode
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
177 'translation-table)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
178 ucs-mule-to-mule-unicode)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
179 (define-translation-table 'utf-translation-table-for-encode
50179
65bb5afb37ef (utf-fragment-on-decoding): Don't call
Kenichi Handa <handa@m17n.org>
parents: 50085
diff changeset
180 utf-defragmentation-table)))
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
181 (define-translation-table 'utf-translation-table-for-decode)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
182 ;; When unify-8859-on-encoding-mode is off, be sure to make
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
183 ;; mule-utf-* disabled for characters in
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
184 ;; utf-fragmentation-table.
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
185 (unless (eq (get 'utf-translation-table-for-encode
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
186 'translation-table)
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
187 ucs-mule-to-mule-unicode)
50179
65bb5afb37ef (utf-fragment-on-decoding): Don't call
Kenichi Handa <handa@m17n.org>
parents: 50085
diff changeset
188 (define-translation-table 'utf-translation-table-for-encode)))
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
189 (set-default s v))
59996
aac0a33f5772 Change release version from 21.4 to 22.1 throughout.
Kim F. Storm <storm@cua.dk>
parents: 59096
diff changeset
190 :version "22.1"
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
191 :type 'boolean
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
192 :group 'mule)
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
193
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
194
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
195 (defconst utf-translate-cjk-charsets '(chinese-gb2312
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
196 chinese-big5-1 chinese-big5-2
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
197 japanese-jisx0208 japanese-jisx0212
57761
13239a8e9e80 (utf-translate-cjk-charsets): Add katakana-jisx0201.
Kenichi Handa <handa@m17n.org>
parents: 57737
diff changeset
198 katakana-jisx0201
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
199 korean-ksc5601)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
200 "List of charsets supported by `utf-translate-cjk-mode'.")
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
201
57727
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
202 (defvar utf-translate-cjk-lang-env nil
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
203 "Language environment in which tables for `utf-translate-cjk-mode' is loaded.
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
204 The value nil means that the tables are not yet loaded.")
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
205
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
206 (defvar utf-translate-cjk-unicode-range)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
207
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
208 ;; String generated from utf-translate-cjk-unicode-range. It is
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
209 ;; suitable for an argument to skip-chars-forward.
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
210 (defvar utf-translate-cjk-unicode-range-string nil)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
211
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
212 (defun utf-translate-cjk-set-unicode-range (range)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
213 (setq utf-translate-cjk-unicode-range range)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
214 (setq utf-translate-cjk-unicode-range-string
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
215 (let ((decode-char-no-trans
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
216 #'(lambda (x)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
217 (cond ((< x #x100) (make-char 'latin-iso8859-1 x))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
218 ((< x #x2500)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
219 (setq x (- x #x100))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
220 (make-char 'mule-unicode-0100-24ff
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
221 (+ (/ x 96) 32) (+ (% x 96) 32)))
59996
aac0a33f5772 Change release version from 21.4 to 22.1 throughout.
Kim F. Storm <storm@cua.dk>
parents: 59096
diff changeset
222 ((< x #x3400)
57727
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
223 (setq x (- x #x2500))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
224 (make-char 'mule-unicode-2500-33ff
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
225 (+ (/ x 96) 32) (+ (% x 96) 32)))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
226 (t
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
227 (setq x (- x #xe000))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
228 (make-char 'mule-unicode-e000-ffff
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
229 (+ (/ x 96) 32) (+ (% x 96) 32))))))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
230 ranges from to)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
231 (dolist (elt range)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
232 (setq from (max #xA0 (car elt)) to (min #xffff (cdr elt)))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
233 (if (and (>= to #x3400) (< to #xE000))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
234 (setq to #x33FF))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
235 (cond ((< from #x100)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
236 (if (>= to #xE000)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
237 (setq ranges (cons (cons #xE000 to) ranges)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
238 to #x33FF))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
239 (if (>= to #x2500)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
240 (setq ranges (cons (cons #x2500 to) ranges)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
241 to #x24FF))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
242 (if (>= to #x100)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
243 (setq ranges (cons (cons #x100 to) ranges)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
244 to #xFF)))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
245 ((< from #x2500)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
246 (if (>= to #xE000)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
247 (setq ranges (cons (cons #xE000 to) ranges)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
248 to #x33FF))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
249 (if (>= to #x2500)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
250 (setq ranges (cons (cons #x2500 to) ranges)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
251 to #x24FF)))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
252 ((< from #x3400)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
253 (if (>= to #xE000)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
254 (setq ranges (cons (cons #xE000 to) ranges)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
255 to #x33FF))))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
256 (if (<= from to)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
257 (setq ranges (cons (cons from to) ranges))))
59996
aac0a33f5772 Change release version from 21.4 to 22.1 throughout.
Kim F. Storm <storm@cua.dk>
parents: 59096
diff changeset
258 (mapconcat #'(lambda (x)
aac0a33f5772 Change release version from 21.4 to 22.1 throughout.
Kim F. Storm <storm@cua.dk>
parents: 59096
diff changeset
259 (format "%c-%c"
57727
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
260 (funcall decode-char-no-trans (car x))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
261 (funcall decode-char-no-trans (cdr x))))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
262 ranges "")))
57737
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
263 ;; These forces loading and settting tables for
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
264 ;; utf-translate-cjk-mode.
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
265 (setq utf-translate-cjk-lang-env nil
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
266 ucs-mule-cjk-to-unicode (make-hash-table :test 'eq)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
267 ucs-unicode-to-mule-cjk (make-hash-table :test 'eq)))
57727
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
268
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
269 (defcustom utf-translate-cjk-unicode-range '((#x2e80 . #xd7a3)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
270 (#xff00 . #xffef))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
271 "List of Unicode code ranges supported by `utf-translate-cjk-mode'.
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
272 Setting this variable directly does not take effect;
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
273 use either \\[customize] or the function
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
274 `utf-translate-cjk-set-unicode-range'."
59996
aac0a33f5772 Change release version from 21.4 to 22.1 throughout.
Kim F. Storm <storm@cua.dk>
parents: 59096
diff changeset
275 :version "22.1"
57727
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
276 :type '(repeat (cons integer integer))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
277 :set (lambda (symbol value)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
278 (utf-translate-cjk-set-unicode-range value))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
279 :group 'mule)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
280
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
281 ;; Return non-nil if CODE-POINT is in `utf-translate-cjk-unicode-range'.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
282 (defsubst utf-translate-cjk-substitutable-p (code-point)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
283 (let ((tail utf-translate-cjk-unicode-range)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
284 elt)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
285 (while tail
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
286 (setq elt (car tail) tail (cdr tail))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
287 (if (and (>= code-point (car elt)) (<= code-point (cdr elt)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
288 (setq tail nil)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
289 (setq elt nil)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
290 elt))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
291
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
292 (defun utf-translate-cjk-load-tables ()
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
293 "Load tables for `utf-translate-cjk-mode'."
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
294 ;; Fixme: Allow the use of the CJK charsets to be
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
295 ;; customized by reordering and possible omission.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
296 (let ((redefined (< (hash-table-size ucs-mule-cjk-to-unicode) 43000)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
297 (if redefined
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
298 ;; Redefine them with realistic initial sizes and a
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
299 ;; smallish rehash size to avoid wasting significant
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
300 ;; space after they're built.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
301 (setq ucs-mule-cjk-to-unicode
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
302 (make-hash-table :test 'eq :size 43000 :rehash-size 1000)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
303 ucs-unicode-to-mule-cjk
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
304 (make-hash-table :test 'eq :size 21500 :rehash-size 1000)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
305
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
306 ;; Load the files explicitly, to avoid having to keep
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
307 ;; around the large tables they contain (as well as the
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
308 ;; ones which get built).
59096
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
309 ;; Here we bind coding-system-for-read to nil so that coding tags
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
310 ;; in the files are respected even if the files are not yet
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
311 ;; byte-compiled
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
312 (let ((coding-system-for-read nil))
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
313 (cond ((string= "Korean" current-language-environment)
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
314 (load "subst-jis")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
315 (load "subst-big5")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
316 (load "subst-gb2312")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
317 (load "subst-ksc"))
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
318 ((string= "Chinese-BIG5" current-language-environment)
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
319 (load "subst-jis")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
320 (load "subst-ksc")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
321 (load "subst-gb2312")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
322 (load "subst-big5"))
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
323 ((string= "Chinese-GB" current-language-environment)
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
324 (load "subst-jis")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
325 (load "subst-ksc")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
326 (load "subst-big5")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
327 (load "subst-gb2312"))
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
328 (t
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
329 (load "subst-ksc")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
330 (load "subst-gb2312")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
331 (load "subst-big5")
2447136abfc1 (utf-translate-cjk-load-tables): Bind
Kenichi Handa <handa@m17n.org>
parents: 57761
diff changeset
332 (load "subst-jis")))) ; jis covers as much as big5, gb2312
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
333
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
334 (when redefined
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
335 (define-translation-hash-table 'utf-subst-table-for-decode
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
336 ucs-unicode-to-mule-cjk)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
337 (define-translation-hash-table 'utf-subst-table-for-encode
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
338 ucs-mule-cjk-to-unicode)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
339 (set-char-table-extra-slot (get 'utf-translation-table-for-encode
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
340 'translation-table)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
341 1 ucs-mule-cjk-to-unicode))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
342
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
343 (setq utf-translate-cjk-lang-env current-language-environment)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
344
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
345 (defun utf-lookup-subst-table-for-decode (code-point)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
346 (if (and utf-translate-cjk-mode
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
347 (not utf-translate-cjk-lang-env)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
348 (utf-translate-cjk-substitutable-p code-point))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
349 (utf-translate-cjk-load-tables))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
350 (gethash code-point
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
351 (get 'utf-subst-table-for-decode 'translation-hash-table)))
56562
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
352
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
353
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
354 (defun utf-lookup-subst-table-for-encode (char)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
355 (if (and utf-translate-cjk-mode
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
356 (not utf-translate-cjk-lang-env)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
357 (memq (char-charset char) utf-translate-cjk-charsets))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
358 (utf-translate-cjk-load-tables))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
359 (gethash char
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
360 (get 'utf-subst-table-for-encode 'translation-hash-table)))
56562
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
361
50341
f49a20cb84ed (utf-translate-cjk-mode): Minor mode,
Kai Großjohann <kgrossjo@eu.uu.net>
parents: 50207
diff changeset
362 (define-minor-mode utf-translate-cjk-mode
56562
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
363 "Toggle whether UTF based coding systems de/encode CJK characters.
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
364 If ARG is an integer, enable if ARG is positive and disable if
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
365 zero or negative. This is a minor mode.
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
366 Enabling this allows the coding systems mule-utf-8,
51628
abfc7d48b476 (utf-translate-cjk-mode): Fix docstring.
Kenichi Handa <handa@m17n.org>
parents: 50766
diff changeset
367 mule-utf-16le and mule-utf-16be to encode characters in the charsets
48848
4eb835c1257d (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 47720
diff changeset
368 `korean-ksc5601', `chinese-gb2312', `chinese-big5-1',
4eb835c1257d (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 47720
diff changeset
369 `chinese-big5-2', `japanese-jisx0208' and `japanese-jisx0212', and to
4eb835c1257d (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 47720
diff changeset
370 decode the corresponding unicodes into such characters.
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
371
48848
4eb835c1257d (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 47720
diff changeset
372 Where the charsets overlap, the one preferred for decoding is chosen
4eb835c1257d (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 47720
diff changeset
373 according to the language environment in effect when this option is
4eb835c1257d (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 47720
diff changeset
374 turned on: ksc5601 for Korean, gb2312 for Chinese-GB, big5 for
4eb835c1257d (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 47720
diff changeset
375 Chinese-Big5 and jisx for other environments.
4eb835c1257d (ucs-mule-cjk-to-unicode)
Dave Love <fx@gnu.org>
parents: 47720
diff changeset
376
56562
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
377 This mode is on by default. If you are not interested in CJK
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
378 characters and want to avoid some overhead on encoding/decoding
56562
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
379 by the above coding systems, you can customize the user option
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
380 `utf-translate-cjk-mode' to nil."
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
381 :init-value t
59996
aac0a33f5772 Change release version from 21.4 to 22.1 throughout.
Kim F. Storm <storm@cua.dk>
parents: 59096
diff changeset
382 :version "22.1"
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
383 :type 'boolean
50341
f49a20cb84ed (utf-translate-cjk-mode): Minor mode,
Kai Großjohann <kgrossjo@eu.uu.net>
parents: 50207
diff changeset
384 :group 'mule
f49a20cb84ed (utf-translate-cjk-mode): Minor mode,
Kai Großjohann <kgrossjo@eu.uu.net>
parents: 50207
diff changeset
385 :global t
f49a20cb84ed (utf-translate-cjk-mode): Minor mode,
Kai Großjohann <kgrossjo@eu.uu.net>
parents: 50207
diff changeset
386 (if utf-translate-cjk-mode
f49a20cb84ed (utf-translate-cjk-mode): Minor mode,
Kai Großjohann <kgrossjo@eu.uu.net>
parents: 50207
diff changeset
387 (progn
50766
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
388 (define-translation-hash-table 'utf-subst-table-for-decode
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
389 ucs-unicode-to-mule-cjk)
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
390 (define-translation-hash-table 'utf-subst-table-for-encode
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
391 ucs-mule-cjk-to-unicode)
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
392 (set-char-table-extra-slot (get 'utf-translation-table-for-encode
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
393 'translation-table)
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
394 1 ucs-mule-cjk-to-unicode))
50549
c8525ac04d76 (utf-translate-cjk-mode): Fix incorrect
Kenichi Handa <handa@m17n.org>
parents: 50341
diff changeset
395 (define-translation-hash-table 'utf-subst-table-for-decode
c8525ac04d76 (utf-translate-cjk-mode): Fix incorrect
Kenichi Handa <handa@m17n.org>
parents: 50341
diff changeset
396 (make-hash-table :test 'eq))
c8525ac04d76 (utf-translate-cjk-mode): Fix incorrect
Kenichi Handa <handa@m17n.org>
parents: 50341
diff changeset
397 (define-translation-hash-table 'utf-subst-table-for-encode
50766
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
398 (make-hash-table :test 'eq))
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
399 (set-char-table-extra-slot (get 'utf-translation-table-for-encode
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
400 'translation-table)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
401 1 nil))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
402
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
403 ;; Update safe-chars of mule-utf-* coding systems.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
404 (dolist (elt (coding-system-list t))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
405 (if (string-match "^mule-utf" (symbol-name elt))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
406 (let ((safe-charsets (coding-system-get elt 'safe-charsets))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
407 (safe-chars (coding-system-get elt 'safe-chars))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
408 (need-update nil))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
409 (dolist (charset utf-translate-cjk-charsets)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
410 (unless (eq utf-translate-cjk-mode (memq charset safe-charsets))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
411 (setq safe-charsets
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
412 (if utf-translate-cjk-mode
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
413 (cons charset safe-charsets)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
414 (delq charset safe-charsets))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
415 need-update t)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
416 (aset safe-chars (make-char charset) utf-translate-cjk-mode)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
417 (when need-update
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
418 (coding-system-put elt 'safe-charsets safe-charsets)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
419 (define-coding-system-internal elt))))))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
420
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
421 (define-ccl-program ccl-mule-utf-untrans
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
422 ;; R0 is an untranslatable Unicode code-point (U+3500..U+DFFF or
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
423 ;; U+10000..U+10FFFF) or an invaid byte (#x00..#xFF). Write
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
424 ;; eight-bit-control/graphic sequence (2 to 4 chars) representing
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
425 ;; UTF-8 sequence of r0. Registers r4, r5, r6 are modified.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
426 ;;
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
427 ;; This is a subrountine because we assume that this is called very
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
428 ;; rarely (so we don't have to worry about the overhead of the
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
429 ;; call).
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
430 `(0
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
431 ((r5 = ,(charset-id 'eight-bit-control))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
432 (r6 = ,(charset-id 'eight-bit-graphic))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
433 (if (r0 < #x100)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
434 ((r4 = ((r0 >> 6) | #xC0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
435 (write-multibyte-character r6 r4))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
436 ((if (r0 < #x10000)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
437 ((r4 = ((r0 >> 12) | #xE0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
438 (write-multibyte-character r6 r4))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
439 ((r4 = ((r0 >> 18) | #xF0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
440 (write-multibyte-character r6 r4)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
441 (r4 = (((r0 >> 12) & #x3F) | #x80))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
442 (if (r4 < #xA0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
443 (write-multibyte-character r5 r4)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
444 (write-multibyte-character r6 r4))))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
445 (r4 = (((r0 >> 6) & #x3F) | #x80))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
446 (if (r4 < #xA0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
447 (write-multibyte-character r5 r4)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
448 (write-multibyte-character r6 r4))))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
449 (r4 = ((r0 & #x3F) | #x80))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
450 (if (r4 < #xA0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
451 (write-multibyte-character r5 r4)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
452 (write-multibyte-character r6 r4)))))
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
453
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
454 (define-ccl-program ccl-decode-mule-utf-8
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
455 ;;
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
456 ;; charset | bytes in utf-8 | bytes in emacs
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
457 ;; -----------------------+----------------+---------------
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
458 ;; ascii | 1 | 1
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
459 ;; -----------------------+----------------+---------------
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
460 ;; eight-bit-control | 2 | 2
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
461 ;; eight-bit-graphic | 2 | 1
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
462 ;; latin-iso8859-1 | 2 | 2
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
463 ;; -----------------------+----------------+---------------
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
464 ;; mule-unicode-0100-24ff | 2 | 4
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
465 ;; (< 0800) | |
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
466 ;; -----------------------+----------------+---------------
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
467 ;; mule-unicode-0100-24ff | 3 | 4
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
468 ;; (>= 8000) | |
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
469 ;; mule-unicode-2500-33ff | 3 | 4
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
470 ;; mule-unicode-e000-ffff | 3 | 4
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
471 ;; -----------------------+----------------+---------------
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
472 ;; invalid byte | 1 | 2
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
473 ;;
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
474 ;; Thus magnification factor is two.
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
475 ;;
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
476 `(2
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
477 ((r6 = ,(charset-id 'latin-iso8859-1))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
478 (read r0)
37934
88389fa9b713 (ccl-decode-mule-utf-8): Handle
Gerd Moellmann <gerd@gnu.org>
parents: 37097
diff changeset
479 (loop
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
480 (if (r0 < #x80)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
481 ;; 1-byte encoding, i.e., ascii
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
482 (write-read-repeat r0))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
483 (if (r0 < #xc2)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
484 ;; continuation byte (invalid here) or 1st byte of overlong
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
485 ;; 2-byte sequence.
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
486 ((call ccl-mule-utf-untrans)
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
487 (r6 = ,(charset-id 'latin-iso8859-1))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
488 (read r0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
489 (repeat)))
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
490
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
491 ;; Read the 2nd byte.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
492 (read r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
493 (if ((r1 & #b11000000) != #b10000000) ; Invalid 2nd byte
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
494 ((call ccl-mule-utf-untrans)
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
495 (r6 = ,(charset-id 'latin-iso8859-1))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
496 ;; Handle it in the next loop.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
497 (r0 = r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
498 (repeat)))
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
499
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
500 (if (r0 < #xe0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
501 ;; 2-byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
502 ((r1 &= #x3F)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
503 (r1 |= ((r0 & #x1F) << 6))
57737
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
504 ;; Now r1 holds scalar value. We don't have to check
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
505 ;; `overlong sequence' because r0 >= 0xC2.
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
506
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
507 (if (r1 >= 256)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
508 ;; mule-unicode-0100-24ff (< 0800)
57737
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
509 ((r0 = r1)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
510 (lookup-integer utf-subst-table-for-decode r0 r1)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
511 (if (r7 == 0)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
512 ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
513 (r1 -= #x0100)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
514 (r2 = (((r1 / 96) + 32) << 7))
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
515 (r1 %= 96)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
516 (r1 += (r2 + 32))
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
517 (translate-character
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
518 utf-translation-table-for-decode r0 r1)))
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
519 (write-multibyte-character r0 r1)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
520 (read r0)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
521 (repeat))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
522 (if (r1 >= 160)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
523 ;; latin-iso8859-1
57737
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
524 ((r0 = r1)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
525 (lookup-integer utf-subst-table-for-decode r0 r1)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
526 (if (r7 == 0)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
527 ((r1 -= 128)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
528 (write-multibyte-character r6 r1))
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
529 ((write-multibyte-character r0 r1)))
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
530 (read r0)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
531 (repeat))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
532 ;; eight-bit-control
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
533 ((r0 = ,(charset-id 'eight-bit-control))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
534 (write-multibyte-character r0 r1)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
535 (read r0)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
536 (repeat))))))
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
537
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
538 ;; Read the 3rd bytes.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
539 (read r2)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
540 (if ((r2 & #b11000000) != #b10000000) ; Invalid 3rd byte
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
541 ((call ccl-mule-utf-untrans)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
542 (r0 = r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
543 (call ccl-mule-utf-untrans)
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
544 (r6 = ,(charset-id 'latin-iso8859-1))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
545 ;; Handle it in the next loop.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
546 (r0 = r2)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
547 (repeat)))
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
548
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
549 (if (r0 < #xF0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
550 ;; 3byte encoding
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
551 ;; zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
552 ((r3 = ((r0 & #xF) << 12))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
553 (r3 |= ((r1 & #x3F) << 6))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
554 (r3 |= (r2 & #x3F))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
555
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
556 (if (r3 < #x800) ; `overlong sequence'
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
557 ((call ccl-mule-utf-untrans)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
558 (r0 = r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
559 (call ccl-mule-utf-untrans)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
560 (r0 = r2)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
561 (call ccl-mule-utf-untrans)
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
562 (r6 = ,(charset-id 'latin-iso8859-1))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
563 (read r0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
564 (repeat)))
49598
0d8b17d428b5 Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents: 49028
diff changeset
565
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
566 (if (r3 < #x2500)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
567 ;; mule-unicode-0100-24ff (>= 0800)
57737
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
568 ((r0 = r3)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
569 (lookup-integer utf-subst-table-for-decode r0 r1)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
570 (if (r7 == 0)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
571 ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
572 (r3 -= #x0100)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
573 (r3 //= 96)
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
574 (r1 = (r7 + 32))
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
575 (r1 += ((r3 + 32) << 7))
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
576 (translate-character
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
577 utf-translation-table-for-decode r0 r1)))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
578 (write-multibyte-character r0 r1)
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
579 (read r0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
580 (repeat)))
49598
0d8b17d428b5 Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents: 49028
diff changeset
581
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
582 (if (r3 < #x3400)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
583 ;; mule-unicode-2500-33ff
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
584 ((r0 = r3) ; don't zap r3
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
585 (lookup-integer utf-subst-table-for-decode r0 r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
586 (if (r7 == 0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
587 ((r0 = ,(charset-id 'mule-unicode-2500-33ff))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
588 (r3 -= #x2500)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
589 (r3 //= 96)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
590 (r1 = (r7 + 32))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
591 (r1 += ((r3 + 32) << 7))))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
592 (write-multibyte-character r0 r1)
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
593 (read r0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
594 (repeat)))
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
595
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
596 (if (r3 < #xE000)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
597 ;; Try to convert to CJK chars, else
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
598 ;; keep them as eight-bit-{control|graphic}.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
599 ((r0 = r3)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
600 (lookup-integer utf-subst-table-for-decode r3 r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
601 (if r7
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
602 ;; got a translation
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
603 ((write-multibyte-character r3 r1)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
604 (read r0)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
605 (repeat))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
606 ((call ccl-mule-utf-untrans)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
607 (r6 = ,(charset-id 'latin-iso8859-1))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
608 (read r0)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
609 (repeat)))))
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
610
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
611 ;; mule-unicode-e000-ffff
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
612 ;; Fixme: fffe and ffff are invalid.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
613 (r0 = r3) ; don't zap r3
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
614 (lookup-integer utf-subst-table-for-decode r0 r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
615 (if (r7 == 0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
616 ((r0 = ,(charset-id 'mule-unicode-e000-ffff))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
617 (r3 -= #xe000)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
618 (r3 //= 96)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
619 (r1 = (r7 + 32))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
620 (r1 += ((r3 + 32) << 7))))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
621 (write-multibyte-character r0 r1)
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
622 (read r0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
623 (repeat)))
49598
0d8b17d428b5 Trailing whitepace deleted.
Juanma Barranquero <lekktu@gmail.com>
parents: 49028
diff changeset
624
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
625 ;; Read the 4th bytes.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
626 (read r3)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
627 (if ((r3 & #b11000000) != #b10000000) ; Invalid 4th byte
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
628 ((call ccl-mule-utf-untrans)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
629 (r0 = r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
630 (call ccl-mule-utf-untrans)
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
631 (r0 = r2)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
632 (call ccl-mule-utf-untrans)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
633 (r6 = ,(charset-id 'latin-iso8859-1))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
634 ;; Handle it in the next loop.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
635 (r0 = r3)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
636 (repeat)))
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
637
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
638 (if (r0 < #xF8)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
639 ;; 4-byte encoding:
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
640 ;; wwwzzzzzzyyyyyyxxxxxx = 11110www 10zzzzzz 10yyyyyy 10xxxxxx
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
641 ;; keep those bytes as eight-bit-{control|graphic}
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
642 ;; Fixme: allow lookup in utf-subst-table-for-decode.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
643 ((r4 = ((r0 & #x7) << 18))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
644 (r4 |= ((r1 & #x3F) << 12))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
645 (r4 |= ((r2 & #x3F) << 6))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
646 (r4 |= (r3 & #x3F))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
647
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
648 (if (r4 < #x10000) ; `overlong sequence'
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
649 ((call ccl-mule-utf-untrans)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
650 (r0 = r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
651 (call ccl-mule-utf-untrans)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
652 (r0 = r2)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
653 (call ccl-mule-utf-untrans)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
654 (r0 = r3)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
655 (call ccl-mule-utf-untrans))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
656 ((r0 = r4)
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
657 (call ccl-mule-utf-untrans))))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
658
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
659 ;; Unsupported sequence.
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
660 ((call ccl-mule-utf-untrans)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
661 (r0 = r1)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
662 (call ccl-mule-utf-untrans)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
663 (r0 = r2)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
664 (call ccl-mule-utf-untrans)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
665 (r0 = r3)
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
666 (call ccl-mule-utf-untrans)))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
667 (r6 = ,(charset-id 'latin-iso8859-1))
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
668 (read r0)
50085
575609f03daa (ccl-decode-mule-utf-8): Don't loose
Kenichi Handa <handa@m17n.org>
parents: 49598
diff changeset
669 (repeat)))
575609f03daa (ccl-decode-mule-utf-8): Don't loose
Kenichi Handa <handa@m17n.org>
parents: 49598
diff changeset
670
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
671
50085
575609f03daa (ccl-decode-mule-utf-8): Don't loose
Kenichi Handa <handa@m17n.org>
parents: 49598
diff changeset
672 ;; At EOF...
575609f03daa (ccl-decode-mule-utf-8): Don't loose
Kenichi Handa <handa@m17n.org>
parents: 49598
diff changeset
673 (if (r0 >= 0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
674 ;; r0 >= #x80
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
675 ((call ccl-mule-utf-untrans)
50085
575609f03daa (ccl-decode-mule-utf-8): Don't loose
Kenichi Handa <handa@m17n.org>
parents: 49598
diff changeset
676 (if (r1 >= 0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
677 ((r0 = r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
678 (call ccl-mule-utf-untrans)
50085
575609f03daa (ccl-decode-mule-utf-8): Don't loose
Kenichi Handa <handa@m17n.org>
parents: 49598
diff changeset
679 (if (r2 >= 0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
680 ((r0 = r2)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
681 (call ccl-mule-utf-untrans)
50085
575609f03daa (ccl-decode-mule-utf-8): Don't loose
Kenichi Handa <handa@m17n.org>
parents: 49598
diff changeset
682 (if (r3 >= 0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
683 ((r0 = r3)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
684 (call ccl-mule-utf-untrans))))))))))
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
685
36243
a05ae5420f85 Doc and commentary fixes.
Dave Love <fx@gnu.org>
parents: 35542
diff changeset
686 "CCL program to decode UTF-8.
36465
f968e313e8ad Doc fixes.
Dave Love <fx@gnu.org>
parents: 36423
diff changeset
687 Basic decoding is done into the charsets ascii, latin-iso8859-1 and
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
688 mule-unicode-*, but see also `utf-fragmentation-table' and
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
689 `ucs-mule-cjk-to-unicode'.
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
690 Encodings of un-representable Unicode characters are decoded asis into
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
691 eight-bit-control and eight-bit-graphic characters.")
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
692
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
693 (define-ccl-program ccl-mule-utf-8-encode-untrans
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
694 ;; UTF-8 decoder generates an UTF-8 sequence represented by a
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
695 ;; sequence eight-bit-control/graphic chars for an untranslatable
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
696 ;; character and an invalid byte.
56562
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
697 ;;
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
698 ;; This CCL parses that sequence (the first byte is already in r1),
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
699 ;; writes out the original bytes of that sequence, and sets r5 to
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
700 ;; -1.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
701 ;;
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
702 ;; If the eight-bit-control/graphic sequence is shorter than what r1
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
703 ;; suggests, it sets r5 and r6 to the last character read that
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
704 ;; should be handled by the next loop of a caller.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
705 ;;
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
706 ;; Note: For UTF-8 validation, we only check if a character is
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
707 ;; eight-bit-control/graphic or not. It may result in incorrect
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
708 ;; handling of random binary data, but such a data can't be encoded
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
709 ;; by UTF-8 anyway. At least, UTF-8 decoders doesn't generate such
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
710 ;; a sequence even if a source contains invalid byte-sequence.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
711 `(0
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
712 (;; Read the 2nd byte.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
713 (read-multibyte-character r5 r6)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
714 (r0 = (r5 != ,(charset-id 'eight-bit-control)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
715 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
56562
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
716 ((write r1) ; invalid UTF-8
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
717 (r1 = -1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
718 (end)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
719
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
720 (if (r1 <= #xC3)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
721 ;; 2-byte sequence for an originally invalid byte.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
722 ((r6 &= #x3F)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
723 (r6 |= ((r1 & #x1F) << 6))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
724 (write r6)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
725 (r5 = -1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
726 (end)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
727
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
728 (write r1 r6)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
729 (r2 = r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
730 (r1 = -1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
731 ;; Read the 3rd byte.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
732 (read-multibyte-character r5 r6)
56562
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
733 (r0 = (r5 != ,(charset-id 'eight-bit-control)))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
734 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
735 (end)) ; invalid UTF-8
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
736 (write r6)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
737 (if (r2 < #xF0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
738 ;; 3-byte sequence for an untranslated character.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
739 ((r5 = -1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
740 (end)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
741 ;; Read the 4th byte.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
742 (read-multibyte-character r5 r6)
56562
9274a15c1400 (utf-translate-cjk-mode): Doc fix.
Luc Teirlinck <teirllm@auburn.edu>
parents: 56095
diff changeset
743 (r0 = (r5 != ,(charset-id 'eight-bit-control)))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
744 (if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
745 (end)) ; invalid UTF-8
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
746 ;; 4-byte sequence for an untranslated character.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
747 (write r6)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
748 (r5 = -1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
749 (end))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
750
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
751 ;; At EOF...
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
752 ((r5 = -1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
753 (if (r1 >= 0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
754 (write r1)))))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
755
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
756 (define-ccl-program ccl-encode-mule-utf-8
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
757 `(1
37097
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
758 ((r5 = -1)
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
759 (loop
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
760 (if (r5 < 0)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
761 (read-multibyte-character r0 r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
762 ;; Pre-read character is in r5 (charset-ID) and r6 (code-point).
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
763 ((r0 = r5)
37097
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
764 (r1 = r6)
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
765 (r5 = -1)))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
766 (translate-character utf-translation-table-for-encode r0 r1)
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
767
37097
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
768 (if (r0 == ,(charset-id 'ascii))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
769 (write-repeat r1))
37097
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
770
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
771 (if (r0 == ,(charset-id 'latin-iso8859-1))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
772 ;; r1 scalar utf-8
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
773 ;; 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
774 ;; 20 0000 0000 1010 0000 1100 0010 1010 0000
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
775 ;; 7f 0000 0000 1111 1111 1100 0011 1011 1111
56095
4ec2da03a87c (ccl-encode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56056
diff changeset
776 ((write ((r1 >> 6) | #xc2))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
777 (r1 &= #x3f)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
778 (r1 |= #x80)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
779 (write-repeat r1)))
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
780
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
781 (if (r0 == ,(charset-id 'mule-unicode-0100-24ff))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
782 ((r0 = ((((r1 & #x3f80) >> 7) - 32) * 96))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
783 ;; #x3f80 == (0011 1111 1000 0000)b
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
784 (r1 &= #x7f)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
785 (r1 += (r0 + 224)) ; 240 == -32 + #x0100
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
786 ;; now r1 holds scalar value
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
787 (if (r1 < #x0800)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
788 ;; 2byte encoding
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
789 ((write ((r1 >> 6) | #xC0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
790 (r1 &= #x3F)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
791 (r1 |= #x80)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
792 (write-repeat r1))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
793 ;; 3byte encoding
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
794 ((write ((r1 >> 12) | #xE0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
795 (write (((r1 & #x0FC0) >> 6) | #x80))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
796 (r1 &= #x3F)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
797 (r1 |= #x80)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
798 (write-repeat r1)))))
37097
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
799
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
800 (if (r0 == ,(charset-id 'mule-unicode-2500-33ff))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
801 ((r0 = ((((r1 & #x3f80) >> 7) - 32) * 96))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
802 (r1 &= #x7f)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
803 (r1 += (r0 + 9440)) ; 9440 == -32 + #x2500
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
804 ;; now r1 holds scalar value
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
805 (write ((r1 >> 12) | #xE0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
806 (write (((r1 & #x0FC0) >> 6) | #x80))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
807 (r1 &= #x3F)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
808 (r1 |= #x80)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
809 (write-repeat r1)))
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
810
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
811 (if (r0 == ,(charset-id 'mule-unicode-e000-ffff))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
812 ((r0 = ((((r1 & #x3f80) >> 7) - 32) * 96))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
813 (r1 &= #x7f)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
814 (r1 += (r0 + 57312)) ; 57312 == -32 + #xe000
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
815 ;; now r1 holds scalar value
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
816 (write ((r1 >> 12) | #xE0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
817 (write (((r1 & #x0FC0) >> 6) | #x80))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
818 (r1 &= #x3F)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
819 (r1 |= #x80)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
820 (write-repeat r1)))
37097
b095952a8678 (ccl-encode-mule-utf-8): Fix handling of eight-bit-control chars.
Kenichi Handa <handa@m17n.org>
parents: 36522
diff changeset
821
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
822 (if (r0 == ,(charset-id 'eight-bit-control))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
823 ;; r1 scalar utf-8
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
824 ;; 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
825 ;; 80 0000 0000 1000 0000 1100 0010 1000 0000
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
826 ;; 9f 0000 0000 1001 1111 1100 0010 1001 1111
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
827 ((write #xC2)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
828 (write-repeat r1)))
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
829
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
830 (if (r0 == ,(charset-id 'eight-bit-graphic))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
831 ;; r1 scalar utf-8
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
832 ;; 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
833 ;; a0 0000 0000 1010 0000 1100 0010 1010 0000
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
834 ;; ff 0000 0000 1111 1111 1101 1111 1011 1111
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
835 ((r0 = (r1 >= #xC0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
836 (r0 &= (r1 <= #xC3))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
837 (r4 = (r1 >= #xE1))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
838 (r4 &= (r1 <= #xF7))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
839 (r0 |= r4)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
840 (if r0
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
841 ((call ccl-mule-utf-8-encode-untrans)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
842 (repeat))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
843 (write-repeat r1))))
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
844
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
845 (lookup-character utf-subst-table-for-encode r0 r1)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
846 (if r7 ; lookup succeeded
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
847 (if (r0 < #x800)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
848 ;; 2byte encoding
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
849 ((write ((r0 >> 6) | #xC0))
56095
4ec2da03a87c (ccl-encode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56056
diff changeset
850 (r0 = ((r0 & #x3F) | #x80))
4ec2da03a87c (ccl-encode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56056
diff changeset
851 (write-repeat r0))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
852 ;; 3byte encoding
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
853 ((write ((r0 >> 12) | #xE0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
854 (write (((r0 & #x0FC0) >> 6) | #x80))
56095
4ec2da03a87c (ccl-encode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56056
diff changeset
855 (r0 = ((r0 & #x3F) | #x80))
4ec2da03a87c (ccl-encode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56056
diff changeset
856 (write-repeat r0))))
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
857
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
858 ;; Unsupported character.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
859 ;; Output U+FFFD, which is `ef bf bd' in UTF-8.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
860 (write #xef)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
861 (write #xbf)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
862 (write-repeat #xbd))))
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
863 "CCL program to encode into UTF-8.")
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
864
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
865
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
866 (define-ccl-program ccl-untranslated-to-ucs
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
867 `(0
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
868 (if (r1 == 0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
869 nil
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
870 (if (r0 <= #xC3) ; 2-byte encoding
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
871 ((r0 = ((r0 & #x3) << 6))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
872 (r0 |= (r1 & #x3F))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
873 (r1 = 2))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
874 (if (r2 == 0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
875 (r1 = 0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
876 (if (r0 < #xF0) ; 3-byte encoding, as above
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
877 ((r0 = ((r0 & #xF) << 12))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
878 (r0 |= ((r1 & #x3F) << 6))
56056
4575a565f45d (ccl-decode-mule-utf-8): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 56037
diff changeset
879 (r0 |= (r2 & #x3F))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
880 (r1 = 3))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
881 (if (r3 == 0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
882 (r1 = 0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
883 ((r0 = ((r0 & #x7) << 18))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
884 (r0 |= ((r1 & #x3F) << 12))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
885 (r0 |= ((r2 & #x3F) << 6))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
886 (r0 |= (r3 & #x3F))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
887 (r1 = 4))))))))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
888 "Decode 2-, 3-, or 4-byte sequences in r0, r1, r2 [,r3] to unicodes in r0.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
889 Set r1 to the byte length. r0 == 0 for invalid sequence.")
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
890
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
891 (defvar utf-8-ccl-regs (make-vector 8 0))
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
892
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
893 (defsubst utf-8-untranslated-to-ucs ()
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
894 "Return the UCS code for an untranslated sequence of raw bytes t point.
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
895 Only for 3- or 4-byte sequences."
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
896 (aset utf-8-ccl-regs 0 (or (char-after) 0))
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
897 (aset utf-8-ccl-regs 1 (or (char-after (1+ (point))) 0))
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
898 (aset utf-8-ccl-regs 2 (or (char-after (+ 2 (point))) 0))
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
899 (aset utf-8-ccl-regs 3 (or (char-after (+ 3 (point))) 0))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
900 (ccl-execute 'ccl-untranslated-to-ucs utf-8-ccl-regs))
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
901
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
902 (defun utf-8-help-echo (window object position)
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
903 (format "Untranslated Unicode U+%04X"
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
904 (get-char-property position 'untranslated-utf-8 object)))
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
905
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
906 ;; We compose the untranslatable sequences into a single character,
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
907 ;; and move point to the next character.
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
908 ;; This is infelicitous for editing, because there's currently no
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
909 ;; mechanism for treating compositions as atomic, but is OK for
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
910 ;; display. They are composed to U+FFFD with help-echo which
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
911 ;; indicates the unicodes they represent. This function GCs too much.
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
912
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
913 ;; If utf-translate-cjk-mode is non-nil, this function is called with
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
914 ;; HASH-TABLE which translates CJK characters into some of CJK
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
915 ;; charsets.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
916
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
917 (defsubst utf-8-compose (hash-table)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
918 "Put a suitable composition on an untranslatable sequence at point.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
919 If HASH-TABLE is non-nil, try to translate CJK characters by it at first.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
920 Move point to the end of the sequence."
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
921 (utf-8-untranslated-to-ucs)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
922 (let ((l (aref utf-8-ccl-regs 1))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
923 ch)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
924 (if (> l 0)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
925 (if (and hash-table
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
926 (setq ch (gethash (aref utf-8-ccl-regs 0) hash-table)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
927 (progn
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
928 (insert ch)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
929 (delete-region (point) (min (point-max) (+ l (point)))))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
930 (setq ch (aref utf-8-ccl-regs 0))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
931 (put-text-property (point) (min (point-max) (+ l (point)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
932 'untranslated-utf-8 ch)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
933 (put-text-property (point) (min (point-max) (+ l (point)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
934 'help-echo 'utf-8-help-echo)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
935 (if (= l 2)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
936 (put-text-property (point) (min (point-max) (+ l (point)))
66062
b23c01e98a4b (utf-8-compose): Display an invalid UTF-8 byte with `escape-glyph'
Kenichi Handa <handa@m17n.org>
parents: 64085
diff changeset
937 'display (propertize (format "\\%03o" ch)
b23c01e98a4b (utf-8-compose): Display an invalid UTF-8 byte with `escape-glyph'
Kenichi Handa <handa@m17n.org>
parents: 64085
diff changeset
938 'face 'escape-glyph))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
939 (compose-region (point) (+ l (point)) ?$,3u=(B))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
940 (forward-char l))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
941 (forward-char 1))))
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
942
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
943 (defcustom utf-8-compose-scripts nil
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
944 "*Non-nil means compose various scripts on decoding utf-8 text."
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
945 :group 'mule
59996
aac0a33f5772 Change release version from 21.4 to 22.1 throughout.
Kim F. Storm <storm@cua.dk>
parents: 59096
diff changeset
946 :version "22.1"
46496
395e5c46761b (utf-8-subst-table)
Dave Love <fx@gnu.org>
parents: 44411
diff changeset
947 :type 'boolean)
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
948
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
949 (defun utf-8-post-read-conversion (length)
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
950 "Compose untranslated utf-8 sequences into single characters.
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
951 If `utf-translate-cjk-mode' is non-nil, tries to translate CJK characters.
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
952 Also compose particular scripts if `utf-8-compose-scripts' is non-nil."
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
953 (save-excursion
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
954 (save-restriction
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
955 (narrow-to-region (point) (+ (point) length))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
956 ;; Can't do eval-when-compile to insert a multibyte constant
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
957 ;; version of the string in the loop, since it's always loaded as
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
958 ;; unibyte from a byte-compiled file.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
959 (let ((range (string-as-multibyte "^\xc0-\xc3\xe1-\xf7"))
56800
752ef76fcc08 (utf-8-post-read-conversion): If the
Kenichi Handa <handa@m17n.org>
parents: 56562
diff changeset
960 (buffer-multibyte enable-multibyte-characters)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
961 hash-table ch)
56800
752ef76fcc08 (utf-8-post-read-conversion): If the
Kenichi Handa <handa@m17n.org>
parents: 56562
diff changeset
962 (set-buffer-multibyte t)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
963 (when utf-translate-cjk-mode
57727
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
964 (unless utf-translate-cjk-lang-env
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
965 ;; Check these characters in utf-translate-cjk-range.
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
966 ;; We may have to translate them to CJK charsets.
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
967 (skip-chars-forward
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
968 (concat range utf-translate-cjk-unicode-range-string))
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
969 (unless (eobp)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
970 (utf-translate-cjk-load-tables)
c3945be39e09 (utf-translate-cjk-unicode-range-string):
Kenichi Handa <handa@m17n.org>
parents: 56800
diff changeset
971 (setq range
57737
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
972 (concat range utf-translate-cjk-unicode-range-string)))
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
973 (setq hash-table (get 'utf-subst-table-for-decode
e425df7605c9 (ccl-decode-mule-utf-8): Check utf-subst-table-for-decode for more
Kenichi Handa <handa@m17n.org>
parents: 57727
diff changeset
974 'translation-hash-table))))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
975 (while (and (skip-chars-forward range)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
976 (not (eobp)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
977 (setq ch (following-char))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
978 (if (< ch 256)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
979 (utf-8-compose hash-table)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
980 (if (and hash-table
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
981 (setq ch (gethash (encode-char ch 'ucs) hash-table)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
982 (progn
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
983 (insert ch)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
984 (delete-char 1))
56800
752ef76fcc08 (utf-8-post-read-conversion): If the
Kenichi Handa <handa@m17n.org>
parents: 56562
diff changeset
985 (forward-char 1))))
752ef76fcc08 (utf-8-post-read-conversion): If the
Kenichi Handa <handa@m17n.org>
parents: 56562
diff changeset
986 (or buffer-multibyte
752ef76fcc08 (utf-8-post-read-conversion): If the
Kenichi Handa <handa@m17n.org>
parents: 56562
diff changeset
987 (set-buffer-multibyte nil)))
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
988
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
989 (when (and utf-8-compose-scripts (> length 1))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
990 ;; These currently have definitions which cover the relevant
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
991 ;; unicodes. We could avoid loading thai-util &c by checking
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
992 ;; whether the region contains any characters with the appropriate
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
993 ;; categories. There aren't yet Unicode-based rules for Tibetan.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
994 (diacritic-compose-region (point-max) (point-min))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
995 (thai-compose-region (point-max) (point-min))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
996 (lao-compose-region (point-max) (point-min))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
997 (devanagari-compose-region (point-max) (point-min))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
998 (malayalam-compose-region (point-max) (point-min))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
999 (tamil-compose-region (point-max) (point-min)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1000 (- (point-max) (point-min)))))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1001
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1002 (defun utf-8-pre-write-conversion (beg end)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1003 "Prepare for `utf-translate-cjk-mode' to encode text between BEG and END.
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1004 This is used as a post-read-conversion of utf-8 coding system."
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1005 (if (and utf-translate-cjk-mode
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1006 (not utf-translate-cjk-lang-env)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1007 (save-excursion
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1008 (goto-char beg)
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1009 (re-search-forward "\\cc\\|\\cj\\|\\ch" end t)))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1010 (utf-translate-cjk-load-tables))
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1011 nil)
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1012
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1013 (make-coding-system
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1014 'mule-utf-8 4 ?u
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1015 "UTF-8 encoding for Emacs-supported Unicode characters.
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1016 It supports Unicode characters of these ranges:
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1017 U+0000..U+33FF, U+E000..U+FFFF.
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1018 They correspond to these Emacs character sets:
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1019 ascii, latin-iso8859-1, mule-unicode-0100-24ff,
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1020 mule-unicode-2500-33ff, mule-unicode-e000-ffff
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1021
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1022 On decoding (e.g. reading a file), Unicode characters not in the above
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1023 ranges are decoded into sequences of eight-bit-control and
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1024 eight-bit-graphic characters to preserve their byte sequences. The
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1025 byte sequence is preserved on i/o for valid utf-8, but not necessarily
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1026 for invalid utf-8.
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1027
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1028 On encoding (e.g. writing a file), Emacs characters not belonging to
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1029 any of the character sets listed above are encoded into the UTF-8 byte
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1030 sequence representing U+FFFD (REPLACEMENT CHARACTER)."
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1031
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1032 '(ccl-decode-mule-utf-8 . ccl-encode-mule-utf-8)
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1033 `((safe-charsets
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1034 ascii
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1035 eight-bit-control
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1036 eight-bit-graphic
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1037 latin-iso8859-1
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1038 mule-unicode-0100-24ff
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1039 mule-unicode-2500-33ff
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1040 mule-unicode-e000-ffff
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1041 ,@(if utf-translate-cjk-mode
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1042 utf-translate-cjk-charsets))
36371
f6bb3ed752b4 (mule-utf-8): Set correct value for valid-codes property.
Kenichi Handa <handa@m17n.org>
parents: 36243
diff changeset
1043 (mime-charset . utf-8)
36423
aa776838b660 (mule-utf-8): Set coding-category property to coding-category-utf-8.
Kenichi Handa <handa@m17n.org>
parents: 36371
diff changeset
1044 (coding-category . coding-category-utf-8)
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1045 (valid-codes (0 . 255))
56037
81dbb510a1db (utf-translate-cjk-charsets): New
Kenichi Handa <handa@m17n.org>
parents: 55437
diff changeset
1046 (pre-write-conversion . utf-8-pre-write-conversion)
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1047 (post-read-conversion . utf-8-post-read-conversion)
50766
fc9cb527333d (utf-translate-cjk-mode): Update the
Kenichi Handa <handa@m17n.org>
parents: 50549
diff changeset
1048 (translation-table-for-encode . utf-translation-table-for-encode)
47703
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1049 (dependency unify-8859-on-encoding-mode
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1050 unify-8859-on-decoding-mode
6d4430dfeafc (ucs-mule-to-mule-unicode): Don't define
Kenichi Handa <handa@m17n.org>
parents: 47409
diff changeset
1051 utf-fragment-on-decoding
55437
6e677a935fe9 Fix references to utf-translate-cjk into utf-translate-cjk-mode.
Andreas Schwab <schwab@suse.de>
parents: 54304
diff changeset
1052 utf-translate-cjk-mode)))
35542
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1053
e4a75e66ee46 new file
Kenichi Handa <handa@m17n.org>
parents:
diff changeset
1054 (define-coding-system-alias 'utf-8 'mule-utf-8)
38436
b174db545cfd Some fixes to follow coding conventions.
Pavel Janík <Pavel@Janik.cz>
parents: 37934
diff changeset
1055
41873
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1056 ;; I think this needs special private charsets defined for the
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1057 ;; untranslated sequences, if it's going to work well.
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1058
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1059 ;;; (defun utf-8-compose-function (pos to pattern &optional string)
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1060 ;;; (let* ((prop (get-char-property pos 'composition string))
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1061 ;;; (l (and prop (- (cadr prop) (car prop)))))
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1062 ;;; (cond ((and l (> l (- to pos)))
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1063 ;;; (delete-region pos to))
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1064 ;;; ((and (> (char-after pos) 224)
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1065 ;;; (< (char-after pos) 256)
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1066 ;;; (save-restriction
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1067 ;;; (narrow-to-region pos to)
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1068 ;;; (utf-8-compose)))
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1069 ;;; t))))
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1070
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1071 ;;; (dotimes (i 96)
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1072 ;;; (aset composition-function-table
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1073 ;;; (+ 128 i)
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1074 ;;; `((,(string-as-multibyte "[\200-\237\240-\377]")
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1075 ;;; . utf-8-compose-function))))
16ee1ffbef65 (ucs-mule-to-mule-unicode): New
Dave Love <fx@gnu.org>
parents: 38436
diff changeset
1076
52401
695cf19ef79e Add arch taglines
Miles Bader <miles@gnu.org>
parents: 52284
diff changeset
1077 ;;; arch-tag: b08735b7-753b-4ae6-b754-0f3efe4515c5
38436
b174db545cfd Some fixes to follow coding conventions.
Pavel Janík <Pavel@Janik.cz>
parents: 37934
diff changeset
1078 ;;; utf-8.el ends here