annotate src/charset.h @ 38588:08a4c6c00af0

(init_from_display_pos): If POS is in an overlay string, deal with the first overlay string having an image `display' property. (try_window_reusing_current_matrix, compute_line_metrics): Fix computation of row's visible height for the case that part of the row is invisible above and part of the row is at the same time invisible below the window.
author Gerd Moellmann <gerd@gnu.org>
date Fri, 27 Jul 2001 15:29:16 +0000
parents b7a9187751b2
children ab30bd62a6a8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
1 /* Header for multibyte character handler.
20708
ed9ed828415e Update copyright year.
Richard M. Stallman <rms@gnu.org>
parents: 20589
diff changeset
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
18341
33e78cc7f058 Change copyright notices.
Richard M. Stallman <rms@gnu.org>
parents: 17833
diff changeset
3 Licensed to the Free Software Foundation.
38395
b7a9187751b2 (CHAR_STRING_NO_SIGNAL): New macro.
Gerd Moellmann <gerd@gnu.org>
parents: 35949
diff changeset
4 Copyright (C) 2001 Free Software Foundation, Inc.
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
5
17071
70194012fb3a Fix FSF address in comment.
Karl Heuer <kwzh@gnu.org>
parents: 17052
diff changeset
6 This file is part of GNU Emacs.
70194012fb3a Fix FSF address in comment.
Karl Heuer <kwzh@gnu.org>
parents: 17052
diff changeset
7
70194012fb3a Fix FSF address in comment.
Karl Heuer <kwzh@gnu.org>
parents: 17052
diff changeset
8 GNU Emacs is free software; you can redistribute it and/or modify
70194012fb3a Fix FSF address in comment.
Karl Heuer <kwzh@gnu.org>
parents: 17052
diff changeset
9 it under the terms of the GNU General Public License as published by
70194012fb3a Fix FSF address in comment.
Karl Heuer <kwzh@gnu.org>
parents: 17052
diff changeset
10 the Free Software Foundation; either version 2, or (at your option)
70194012fb3a Fix FSF address in comment.
Karl Heuer <kwzh@gnu.org>
parents: 17052
diff changeset
11 any later version.
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
12
17071
70194012fb3a Fix FSF address in comment.
Karl Heuer <kwzh@gnu.org>
parents: 17052
diff changeset
13 GNU Emacs is distributed in the hope that it will be useful,
70194012fb3a Fix FSF address in comment.
Karl Heuer <kwzh@gnu.org>
parents: 17052
diff changeset
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
70194012fb3a Fix FSF address in comment.
Karl Heuer <kwzh@gnu.org>
parents: 17052
diff changeset
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
70194012fb3a Fix FSF address in comment.
Karl Heuer <kwzh@gnu.org>
parents: 17052
diff changeset
16 GNU General Public License for more details.
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
17
17071
70194012fb3a Fix FSF address in comment.
Karl Heuer <kwzh@gnu.org>
parents: 17052
diff changeset
18 You should have received a copy of the GNU General Public License
70194012fb3a Fix FSF address in comment.
Karl Heuer <kwzh@gnu.org>
parents: 17052
diff changeset
19 along with GNU Emacs; see the file COPYING. If not, write to
70194012fb3a Fix FSF address in comment.
Karl Heuer <kwzh@gnu.org>
parents: 17052
diff changeset
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
70194012fb3a Fix FSF address in comment.
Karl Heuer <kwzh@gnu.org>
parents: 17052
diff changeset
21 Boston, MA 02111-1307, USA. */
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
22
29570
825505ff211e (EMACS_CHARSET_H): Renamed from _CHARSET_H.
Kenichi Handa <handa@m17n.org>
parents: 29539
diff changeset
23 #ifndef EMACS_CHARSET_H
825505ff211e (EMACS_CHARSET_H): Renamed from _CHARSET_H.
Kenichi Handa <handa@m17n.org>
parents: 29539
diff changeset
24 #define EMACS_CHARSET_H
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
25
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
26 /* #define BYTE_COMBINING_DEBUG */
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
27
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
28 /*** GENERAL NOTE on CHARACTER SET (CHARSET) ***
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
29
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
30 A character set ("charset" hereafter) is a meaningful collection
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
31 (i.e. language, culture, functionality, etc) of characters. Emacs
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
32 handles multiple charsets at once. Each charset corresponds to one
35478
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
33 of the ISO charsets. Emacs identifies a charset by a unique
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
34 identification number, whereas ISO identifies a charset by a triplet
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
35 of DIMENSION, CHARS and FINAL-CHAR. So, hereafter, just saying
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
36 "charset" means an identification number (integer value).
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
37
35478
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
38 The value range of charsets is 0x00, 0x81..0xFE. There are four
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
39 kinds of charset depending on DIMENSION (1 or 2) and CHARS (94 or
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
40 96). For instance, a charset of DIMENSION2_CHARS94 contains 94x94
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
41 characters.
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
42
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
43 Within Emacs Lisp, a charset is treated as a symbol which has a
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
44 property `charset'. The property value is a vector containing
35478
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
45 various information about the charset. For readability of C code,
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
46 we use the following convention for C variable names:
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
47 charset_symbol: Emacs Lisp symbol of a charset
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
48 charset_id: Emacs Lisp integer of an identification number of a charset
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
49 charset: C integer of an identification number of a charset
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
50
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
51 Each charset (except for ascii) is assigned a base leading-code
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
52 (range 0x80..0x9E). In addition, a charset of greater than 0xA0
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
53 (whose base leading-code is 0x9A..0x9D) is assigned an extended
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
54 leading-code (range 0xA0..0xFE). In this case, each base
35478
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
55 leading-code specifies the allowable range of extended leading-code
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
56 as shown in the table below. A leading-code is used to represent a
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
57 character in Emacs' buffer and string.
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
58
35478
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
59 We call a charset which has extended leading-code a "private
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
60 charset" because those are mainly for a charset which is not yet
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
61 registered by ISO. On the contrary, we call a charset which does
35478
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
62 not have extended leading-code an "official charset".
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
63
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
64 ---------------------------------------------------------------------------
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
65 charset dimension base leading-code extended leading-code
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
66 ---------------------------------------------------------------------------
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
67 0x00 official dim1 -- none -- -- none --
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
68 (ASCII)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
69 0x01..0x7F --never used--
29539
a6f1d75744d8 (CHARSET_8_BIT_GRAPHIC): Define as 0x80.
Kenichi Handa <handa@m17n.org>
parents: 29004
diff changeset
70 0x80 official dim1 -- none -- -- none --
a6f1d75744d8 (CHARSET_8_BIT_GRAPHIC): Define as 0x80.
Kenichi Handa <handa@m17n.org>
parents: 29004
diff changeset
71 (eight-bit-graphic)
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
72 0x81..0x8F official dim1 same as charset -- none --
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
73 0x90..0x99 official dim2 same as charset -- none --
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
74 0x9A..0x9D --never used--
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
75 0x9E official dim1 same as charset -- none --
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
76 (eight-bit-control)
29539
a6f1d75744d8 (CHARSET_8_BIT_GRAPHIC): Define as 0x80.
Kenichi Handa <handa@m17n.org>
parents: 29004
diff changeset
77 0x9F --never used--
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
78 0xA0..0xDF private dim1 0x9A same as charset
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
79 of 1-column width
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
80 0xE0..0xEF private dim1 0x9B same as charset
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
81 of 2-column width
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
82 0xF0..0xF4 private dim2 0x9C same as charset
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
83 of 1-column width
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
84 0xF5..0xFE private dim2 0x9D same as charset
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
85 of 2-column width
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
86 0xFF --never used--
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
87 ---------------------------------------------------------------------------
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
88
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
89 */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
90
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
91 /* Definition of special leading-codes. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
92 /* Leading-code followed by extended leading-code. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
93 #define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
94 #define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
95 #define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
19643
1defeafe575b (LEADING_CODE_PRIVATE_22): Comment fixed.
Kenichi Handa <handa@m17n.org>
parents: 19319
diff changeset
96 #define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
97
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
98 #define LEADING_CODE_8_BIT_CONTROL 0x9E /* for `eight-bit-control' */
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
99
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
100 /* Extended leading-code. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
101 /* Start of each extended leading-codes. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
102 #define LEADING_CODE_EXT_11 0xA0 /* follows LEADING_CODE_PRIVATE_11 */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
103 #define LEADING_CODE_EXT_12 0xE0 /* follows LEADING_CODE_PRIVATE_12 */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
104 #define LEADING_CODE_EXT_21 0xF0 /* follows LEADING_CODE_PRIVATE_21 */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
105 #define LEADING_CODE_EXT_22 0xF5 /* follows LEADING_CODE_PRIVATE_22 */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
106 /* Maximum value of extended leading-codes. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
107 #define LEADING_CODE_EXT_MAX 0xFE
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
108
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
109 /* Definition of minimum/maximum charset of each DIMENSION. */
31460
30806f88340f (MIN_CHARSET_OFFICIAL_DIMENSION1): Define it as 0x80, not 0x81.
Kenichi Handa <handa@m17n.org>
parents: 31434
diff changeset
110 #define MIN_CHARSET_OFFICIAL_DIMENSION1 0x80
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
111 #define MAX_CHARSET_OFFICIAL_DIMENSION1 0x8F
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
112 #define MIN_CHARSET_OFFICIAL_DIMENSION2 0x90
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
113 #define MAX_CHARSET_OFFICIAL_DIMENSION2 0x99
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
114 #define MIN_CHARSET_PRIVATE_DIMENSION1 LEADING_CODE_EXT_11
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
115 #define MIN_CHARSET_PRIVATE_DIMENSION2 LEADING_CODE_EXT_21
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
116
17185
0d5a1bae9d73 (MAX_CHARSET): Definition changed to the actual
Kenichi Handa <handa@m17n.org>
parents: 17120
diff changeset
117 /* Maximum value of overall charset identification number. */
0d5a1bae9d73 (MAX_CHARSET): Definition changed to the actual
Kenichi Handa <handa@m17n.org>
parents: 17120
diff changeset
118 #define MAX_CHARSET 0xFE
0d5a1bae9d73 (MAX_CHARSET): Definition changed to the actual
Kenichi Handa <handa@m17n.org>
parents: 17120
diff changeset
119
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
120 /* Definition of special charsets. */
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
121 #define CHARSET_ASCII 0 /* 0x00..0x7F */
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
122 #define CHARSET_8_BIT_CONTROL 0x9E /* 0x80..0x9F */
29539
a6f1d75744d8 (CHARSET_8_BIT_GRAPHIC): Define as 0x80.
Kenichi Handa <handa@m17n.org>
parents: 29004
diff changeset
123 #define CHARSET_8_BIT_GRAPHIC 0x80 /* 0xA0..0xFF */
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
124
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
125 extern int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
126 extern int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
127 extern int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
128 extern int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
129 extern int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
130 extern int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
131 extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
132
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
133 /* Check if CH is an ASCII character or a base leading-code.
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
134 Nowadays, any byte can be the first byte of a character in a
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
135 multibyte buffer/string. So this macro name is not appropriate. */
20531
f019e056ad9a (CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents: 20352
diff changeset
136 #define CHAR_HEAD_P(ch) ((unsigned char) (ch) < 0xA0)
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
137
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
138 /*** GENERAL NOTE on CHARACTER REPRESENTATION ***
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
139
35478
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
140 Firstly, the term "character" or "char" is used for a multilingual
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
141 character (of course, including ASCII characters), not for a byte in
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
142 computer memory. We use the term "code" or "byte" for the latter
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
143 case.
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
144
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
145 A character is identified by charset and one or two POSITION-CODEs.
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
146 POSITION-CODE is the position of the character in the charset. A
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
147 character of DIMENSION1 charset has one POSITION-CODE: POSITION-CODE-1.
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
148 A character of DIMENSION2 charset has two POSITION-CODE:
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
149 POSITION-CODE-1 and POSITION-CODE-2. The code range of
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
150 POSITION-CODE is 0x20..0x7F.
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
151
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
152 Emacs has two kinds of representation of a character: multi-byte
35478
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
153 form (for buffers and strings) and single-word form (for character
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
154 objects in Emacs Lisp). The latter is called "character code"
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
155 hereafter. Both representations encode the information of charset
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
156 and POSITION-CODE but in a different way (for instance, the MSB of
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
157 POSITION-CODE is set in multi-byte form).
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
158
35478
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
159 For details of the multi-byte form, see the section "2. Emacs
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
160 internal format handlers" of `coding.c'.
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
161
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
162 Emacs uses 19 bits for a character code. The bits are divided into
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
163 3 fields: FIELD1(5bits):FIELD2(7bits):FIELD3(7bits).
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
164
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
165 A character code of DIMENSION1 character uses FIELD2 to hold charset
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
166 and FIELD3 to hold POSITION-CODE-1. A character code of DIMENSION2
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
167 character uses FIELD1 to hold charset, FIELD2 and FIELD3 to hold
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
168 POSITION-CODE-1 and POSITION-CODE-2 respectively.
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
169
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
170 More precisely...
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
171
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
172 FIELD2 of DIMENSION1 character (except for ascii, eight-bit-control,
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
173 and eight-bit-graphic) is "charset - 0x70". This is to make all
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
174 character codes except for ASCII and 8-bit codes greater than 256.
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
175 So, the range of FIELD2 of DIMENSION1 character is 0, 1, or
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
176 0x11..0x7F.
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
177
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
178 FIELD1 of DIMENSION2 character is "charset - 0x8F" for official
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
179 charset and "charset - 0xE0" for private charset. So, the range of
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
180 FIELD1 of DIMENSION2 character is 0x01..0x1E.
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
181
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
182 -----------------------------------------------------------------------------
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
183 charset FIELD1 (5-bit) FIELD2 (7-bit) FIELD3 (7-bit)
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
184 -----------------------------------------------------------------------------
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
185 ascii 0 0 0x00..0x7F
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
186 eight-bit-control 0 1 0x00..0x1F
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
187 eight-bit-graphic 0 1 0x20..0x7F
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
188 DIMENSION1 0 charset - 0x70 POSITION-CODE-1
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
189 DIMENSION2(o) charset - 0x8F POSITION-CODE-1 POSITION-CODE-2
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
190 DIMENSION2(p) charset - 0xE0 POSITION-CODE-1 POSITION-CODE-2
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
191 -----------------------------------------------------------------------------
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
192 "(o)": official, "(p)": private
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
193 -----------------------------------------------------------------------------
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
194 */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
195
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
196 /* Masks of each field of character code. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
197 #define CHAR_FIELD1_MASK (0x1F << 14)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
198 #define CHAR_FIELD2_MASK (0x7F << 7)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
199 #define CHAR_FIELD3_MASK 0x7F
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
200
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
201 /* Macros to access each field of character C. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
202 #define CHAR_FIELD1(c) (((c) & CHAR_FIELD1_MASK) >> 14)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
203 #define CHAR_FIELD2(c) (((c) & CHAR_FIELD2_MASK) >> 7)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
204 #define CHAR_FIELD3(c) ((c) & CHAR_FIELD3_MASK)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
205
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
206 /* Minimum character code of character of each DIMENSION. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
207 #define MIN_CHAR_OFFICIAL_DIMENSION1 \
31460
30806f88340f (MIN_CHARSET_OFFICIAL_DIMENSION1): Define it as 0x80, not 0x81.
Kenichi Handa <handa@m17n.org>
parents: 31434
diff changeset
208 ((0x81 - 0x70) << 7)
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
209 #define MIN_CHAR_PRIVATE_DIMENSION1 \
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
210 ((MIN_CHARSET_PRIVATE_DIMENSION1 - 0x70) << 7)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
211 #define MIN_CHAR_OFFICIAL_DIMENSION2 \
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
212 ((MIN_CHARSET_OFFICIAL_DIMENSION2 - 0x8F) << 14)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
213 #define MIN_CHAR_PRIVATE_DIMENSION2 \
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
214 ((MIN_CHARSET_PRIVATE_DIMENSION2 - 0xE0) << 14)
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
215 /* Maximum character code currently used plus 1. */
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
216 #define MAX_CHAR (0x1F << 14)
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
217
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
218 /* 1 if C is a single byte character, else 0. */
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
219 #define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100)
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
220
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
221 /* 1 if BYTE is an ASCII character in itself, in multibyte mode. */
20257
7ba68c0e1bee (ASCII_BYTE_P): New macro.
Karl Heuer <kwzh@gnu.org>
parents: 20151
diff changeset
222 #define ASCII_BYTE_P(byte) ((byte) < 0x80)
7ba68c0e1bee (ASCII_BYTE_P): New macro.
Karl Heuer <kwzh@gnu.org>
parents: 20151
diff changeset
223
35478
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
224 /* A char-table containing information on each character set.
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
225
35478
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
226 Unlike ordinary char-tables, this doesn't contain any nested tables.
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
227 Only the top level elements are used. Each element is a vector of
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
228 the following information:
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
229 CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
230 LEADING-CODE-BASE, LEADING-CODE-EXT,
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
231 ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
232 REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
233 PLIST.
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
234
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
235 CHARSET-ID (integer) is the identification number of the charset.
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
236
35478
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
237 BYTES (integer) is the length of the multi-byte form of a character
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
238 in the charset: one of 1, 2, 3, and 4.
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
239
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
240 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
241
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
242 CHARS (integer) is the number of characters in a dimension: 94 or 96.
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
243
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
244 WIDTH (integer) is the number of columns a character in the charset
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
245 occupies on the screen: one of 0, 1, and 2..
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
246
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
247 DIRECTION (integer) is the rendering direction of characters in the
24777
d9d0dd9ebcb5 (Vcharset_table): Comment fixed.
Kenichi Handa <handa@m17n.org>
parents: 23882
diff changeset
248 charset when rendering. If 0, render from left to right, else
d9d0dd9ebcb5 (Vcharset_table): Comment fixed.
Kenichi Handa <handa@m17n.org>
parents: 23882
diff changeset
249 render from right to left.
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
250
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
251 LEADING-CODE-BASE (integer) is the base leading-code for the
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
252 charset.
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
253
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
254 LEADING-CODE-EXT (integer) is the extended leading-code for the
35478
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
255 charset. All charsets of less than 0xA0 have the value 0.
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
256
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
257 ISO-FINAL-CHAR (character) is the final character of the
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
258 corresponding ISO 2022 charset. It is -1 for such a character
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
259 that is used only internally (e.g. `eight-bit-control').
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
260
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
261 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
262 while encoding to variants of ISO 2022 coding system, one of the
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
263 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR). It
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
264 is -1 for such a character that is used only internally
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
265 (e.g. `eight-bit-control').
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
266
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
267 REVERSE-CHARSET (integer) is the charset which differs only in
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
268 LEFT-TO-RIGHT value from the charset. If there's no such a
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
269 charset, the value is -1.
35478
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
270
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
271 SHORT-NAME (string) is the short name to refer to the charset.
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
272
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
273 LONG-NAME (string) is the long name to refer to the charset.
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
274
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
275 DESCRIPTION (string) is the description string of the charset.
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
276
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
277 PLIST (property list) may contain any type of information a user
35478
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
278 wants to put and get by functions `put-charset-property' and
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
279 `get-charset-property' respectively. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
280 extern Lisp_Object Vcharset_table;
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
281
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
282 /* Macros to access various information of CHARSET in Vcharset_table.
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
283 We provide these macros for efficiency. No range check of CHARSET. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
284
28513
1fec001e68c5 (CHARSET_TABLE_ENTRY): Fix comment -- argument is a C int, not a lisp integer.
Ken Raeburn <raeburn@raeburn.org>
parents: 28473
diff changeset
285 /* Return entry of CHARSET (C integer) in Vcharset_table. */
17321
9f837bea89e3 (CHARSET_TABLE_ENTRY): Handle ASCII charset correctly.
Kenichi Handa <handa@m17n.org>
parents: 17185
diff changeset
286 #define CHARSET_TABLE_ENTRY(charset) \
9f837bea89e3 (CHARSET_TABLE_ENTRY): Handle ASCII charset correctly.
Kenichi Handa <handa@m17n.org>
parents: 17185
diff changeset
287 XCHAR_TABLE (Vcharset_table)->contents[((charset) == CHARSET_ASCII \
9f837bea89e3 (CHARSET_TABLE_ENTRY): Handle ASCII charset correctly.
Kenichi Handa <handa@m17n.org>
parents: 17185
diff changeset
288 ? 0 : (charset) + 128)]
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
289
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
290 /* Return information INFO-IDX of CHARSET. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
291 #define CHARSET_TABLE_INFO(charset, info_idx) \
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
292 XVECTOR (CHARSET_TABLE_ENTRY (charset))->contents[info_idx]
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
293
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
294 #define CHARSET_ID_IDX (0)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
295 #define CHARSET_BYTES_IDX (1)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
296 #define CHARSET_DIMENSION_IDX (2)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
297 #define CHARSET_CHARS_IDX (3)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
298 #define CHARSET_WIDTH_IDX (4)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
299 #define CHARSET_DIRECTION_IDX (5)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
300 #define CHARSET_LEADING_CODE_BASE_IDX (6)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
301 #define CHARSET_LEADING_CODE_EXT_IDX (7)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
302 #define CHARSET_ISO_FINAL_CHAR_IDX (8)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
303 #define CHARSET_ISO_GRAPHIC_PLANE_IDX (9)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
304 #define CHARSET_REVERSE_CHARSET_IDX (10)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
305 #define CHARSET_SHORT_NAME_IDX (11)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
306 #define CHARSET_LONG_NAME_IDX (12)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
307 #define CHARSET_DESCRIPTION_IDX (13)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
308 #define CHARSET_PLIST_IDX (14)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
309 /* Size of a vector of each entry of Vcharset_table. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
310 #define CHARSET_MAX_IDX (15)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
311
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
312 /* And several more macros to be used frequently. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
313 #define CHARSET_BYTES(charset) \
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
314 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX))
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
315 #define CHARSET_DIMENSION(charset) \
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
316 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX))
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
317 #define CHARSET_CHARS(charset) \
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
318 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX))
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
319 #define CHARSET_WIDTH(charset) \
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
320 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX))
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
321 #define CHARSET_DIRECTION(charset) \
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
322 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX))
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
323 #define CHARSET_LEADING_CODE_BASE(charset) \
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
324 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX))
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
325 #define CHARSET_LEADING_CODE_EXT(charset) \
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
326 XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX))
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
327 #define CHARSET_ISO_FINAL_CHAR(charset) \
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
328 XINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX))
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
329 #define CHARSET_ISO_GRAPHIC_PLANE(charset) \
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
330 XINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX))
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
331 #define CHARSET_REVERSE_CHARSET(charset) \
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
332 XINT (CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX))
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
333
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
334 /* Macros to specify direction of a charset. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
335 #define CHARSET_DIRECTION_LEFT_TO_RIGHT 0
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
336 #define CHARSET_DIRECTION_RIGHT_TO_LEFT 1
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
337
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
338 /* A vector of charset symbol indexed by charset-id. This is used
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
339 only for returning charset symbol from C functions. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
340 extern Lisp_Object Vcharset_symbol_table;
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
341
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
342 /* Return symbol of CHARSET. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
343 #define CHARSET_SYMBOL(charset) \
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
344 XVECTOR (Vcharset_symbol_table)->contents[charset]
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
345
25505
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
346 /* 1 if CHARSET is in valid value range, else 0. */
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
347 #define CHARSET_VALID_P(charset) \
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
348 ((charset) == 0 \
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
349 || ((charset) > 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
350 || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
351 && (charset) <= MAX_CHARSET) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
352 || ((charset) == CHARSET_8_BIT_CONTROL) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
353 || ((charset) == CHARSET_8_BIT_GRAPHIC))
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
354
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
355 /* 1 if CHARSET is already defined, else 0. */
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
356 #define CHARSET_DEFINED_P(charset) \
17185
0d5a1bae9d73 (MAX_CHARSET): Definition changed to the actual
Kenichi Handa <handa@m17n.org>
parents: 17120
diff changeset
357 (((charset) >= 0) && ((charset) <= MAX_CHARSET) \
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
358 && !NILP (CHARSET_TABLE_ENTRY (charset)))
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
359
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
360 /* Since the information CHARSET-BYTES and CHARSET-WIDTH of
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
361 Vcharset_table can be retrieved only by the first byte of
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
362 multi-byte form (an ASCII code or a base leading-code), we provide
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
363 here tables to be used by macros BYTES_BY_CHAR_HEAD and
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
364 WIDTH_BY_CHAR_HEAD for faster information retrieval. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
365 extern int bytes_by_char_head[256];
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
366 extern int width_by_char_head[256];
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
367
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
368 #define BYTES_BY_CHAR_HEAD(char_head) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
369 (ASCII_BYTE_P (char_head) ? 1 : bytes_by_char_head[char_head])
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
370 #define WIDTH_BY_CHAR_HEAD(char_head) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
371 (ASCII_BYTE_P (char_head) ? 1 : width_by_char_head[char_head])
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
372
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
373 /* Charset of the character C. */
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
374 #define CHAR_CHARSET(c) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
375 (SINGLE_BYTE_CHAR_P (c) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
376 ? (ASCII_BYTE_P (c) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
377 ? CHARSET_ASCII \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
378 : (c) < 0xA0 ? CHARSET_8_BIT_CONTROL : CHARSET_8_BIT_GRAPHIC) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
379 : ((c) < MIN_CHAR_OFFICIAL_DIMENSION2 \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
380 ? CHAR_FIELD2 (c) + 0x70 \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
381 : ((c) < MIN_CHAR_PRIVATE_DIMENSION2 \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
382 ? CHAR_FIELD1 (c) + 0x8F \
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
383 : CHAR_FIELD1 (c) + 0xE0)))
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
384
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
385 /* Check if two characters C1 and C2 belong to the same charset. */
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
386 #define SAME_CHARSET_P(c1, c2) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
387 (c1 < MIN_CHAR_OFFICIAL_DIMENSION2 \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
388 ? (c1 & CHAR_FIELD2_MASK) == (c2 & CHAR_FIELD2_MASK) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
389 : (c1 & CHAR_FIELD1_MASK) == (c2 & CHAR_FIELD1_MASK))
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
390
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
391 /* Return a character of which charset is CHARSET and position-codes
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
392 are C1 and C2. DIMENSION1 character ignores C2. */
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
393 #define MAKE_CHAR(charset, c1, c2) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
394 ((charset) == CHARSET_ASCII \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
395 ? (c1) & 0x7F \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
396 : (((charset) == CHARSET_8_BIT_CONTROL \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
397 || (charset) == CHARSET_8_BIT_GRAPHIC) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
398 ? ((c1) & 0x7F) | 0x80 \
30282
3d210892f575 (MAKE_CHAR): Return reasonable code even if CHARSET is undefined.
Kenichi Handa <handa@m17n.org>
parents: 29570
diff changeset
399 : ((CHARSET_DEFINED_P (charset) \
3d210892f575 (MAKE_CHAR): Return reasonable code even if CHARSET is undefined.
Kenichi Handa <handa@m17n.org>
parents: 29570
diff changeset
400 ? CHARSET_DIMENSION (charset) == 1 \
3d210892f575 (MAKE_CHAR): Return reasonable code even if CHARSET is undefined.
Kenichi Handa <handa@m17n.org>
parents: 29570
diff changeset
401 : (charset) < MIN_CHARSET_PRIVATE_DIMENSION2) \
31434
28f0d4e9cbb2 (MAKE_CHAR): Be sure to set MSB of C1 to 0.
Kenichi Handa <handa@m17n.org>
parents: 30282
diff changeset
402 ? (((charset) - 0x70) << 7) | ((c1) <= 0 ? 0 : ((c1) & 0x7F)) \
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
403 : ((((charset) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
404 - ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
405 << 14) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
406 | ((c2) <= 0 ? 0 : ((c2) & 0x7F)) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
407 | ((c1) <= 0 ? 0 : (((c1) & 0x7F) << 7))))))
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
408
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
409
20151
82ca32c55fa3 (CHAR_VALID_P): Renamed from VALID_CHAR_P, new
Kenichi Handa <handa@m17n.org>
parents: 19643
diff changeset
410 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
82ca32c55fa3 (CHAR_VALID_P): Renamed from VALID_CHAR_P, new
Kenichi Handa <handa@m17n.org>
parents: 19643
diff changeset
411 generic character. If GENERICP is zero, return nonzero iff C is a
82ca32c55fa3 (CHAR_VALID_P): Renamed from VALID_CHAR_P, new
Kenichi Handa <handa@m17n.org>
parents: 19643
diff changeset
412 valid normal character. */
82ca32c55fa3 (CHAR_VALID_P): Renamed from VALID_CHAR_P, new
Kenichi Handa <handa@m17n.org>
parents: 19643
diff changeset
413 #define CHAR_VALID_P(c, genericp) \
82ca32c55fa3 (CHAR_VALID_P): Renamed from VALID_CHAR_P, new
Kenichi Handa <handa@m17n.org>
parents: 19643
diff changeset
414 ((c) >= 0 \
82ca32c55fa3 (CHAR_VALID_P): Renamed from VALID_CHAR_P, new
Kenichi Handa <handa@m17n.org>
parents: 19643
diff changeset
415 && (SINGLE_BYTE_CHAR_P (c) || char_valid_p (c, genericp)))
17833
59aa4a0772f6 (VALID_CHAR_P): New macro.
Kenichi Handa <handa@m17n.org>
parents: 17726
diff changeset
416
22184
003ac1231096 (STRING_CHAR_AND_CHAR_LENGTH): New macro.
Kenichi Handa <handa@m17n.org>
parents: 22167
diff changeset
417 /* This default value is used when nonascii-translation-table or
21033
9f32198e0d9f (NONASCII_INSERT_OFFSET): New macro.
Kenichi Handa <handa@m17n.org>
parents: 20932
diff changeset
418 nonascii-insert-offset fail to convert unibyte character to a valid
9f32198e0d9f (NONASCII_INSERT_OFFSET): New macro.
Kenichi Handa <handa@m17n.org>
parents: 20932
diff changeset
419 multibyte character. This makes a Latin-1 character. */
9f32198e0d9f (NONASCII_INSERT_OFFSET): New macro.
Kenichi Handa <handa@m17n.org>
parents: 20932
diff changeset
420
9f32198e0d9f (NONASCII_INSERT_OFFSET): New macro.
Kenichi Handa <handa@m17n.org>
parents: 20932
diff changeset
421 #define DEFAULT_NONASCII_INSERT_OFFSET 0x800
9f32198e0d9f (NONASCII_INSERT_OFFSET): New macro.
Kenichi Handa <handa@m17n.org>
parents: 20932
diff changeset
422
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
423 /* Parse multibyte string STR of length LENGTH and set BYTES to the
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
424 byte length of a character at STR. */
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
425
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
426 #ifdef BYTE_COMBINING_DEBUG
25505
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
427
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
428 #define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
429 do { \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
430 int i = 1; \
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
431 while (i < (length) && ! CHAR_HEAD_P ((str)[i])) i++; \
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
432 (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]); \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
433 if ((bytes) > i) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
434 abort (); \
25505
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
435 } while (0)
21033
9f32198e0d9f (NONASCII_INSERT_OFFSET): New macro.
Kenichi Handa <handa@m17n.org>
parents: 20932
diff changeset
436
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
437 #else /* not BYTE_COMBINING_DEBUG */
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
438
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
439 #define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
440 (bytes) = BYTES_BY_CHAR_HEAD ((str)[0])
19319
9ea3c87ea61d Comment changes.
Richard M. Stallman <rms@gnu.org>
parents: 18341
diff changeset
441
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
442 #endif /* not BYTE_COMBINING_DEBUG */
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
443
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
444 /* Return 1 iff the byte sequence at unibyte string STR (LENGTH bytes)
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
445 is valid as a multibyte form. If valid, by a side effect, BYTES is
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
446 set to the byte length of the multibyte form. */
19319
9ea3c87ea61d Comment changes.
Richard M. Stallman <rms@gnu.org>
parents: 18341
diff changeset
447
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
448 #define UNIBYTE_STR_AS_MULTIBYTE_P(str, length, bytes) \
29539
a6f1d75744d8 (CHARSET_8_BIT_GRAPHIC): Define as 0x80.
Kenichi Handa <handa@m17n.org>
parents: 29004
diff changeset
449 (((str)[0] < 0x80 || (str)[0] >= 0xA0) \
a6f1d75744d8 (CHARSET_8_BIT_GRAPHIC): Define as 0x80.
Kenichi Handa <handa@m17n.org>
parents: 29004
diff changeset
450 ? (bytes) = 1 \
a6f1d75744d8 (CHARSET_8_BIT_GRAPHIC): Define as 0x80.
Kenichi Handa <handa@m17n.org>
parents: 29004
diff changeset
451 : (((bytes) = BYTES_BY_CHAR_HEAD ((str)[0])), \
31703
b5664da02625 (UNIBYTE_STR_AS_MULTIBYTE_P): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 31460
diff changeset
452 ((bytes) > 1 && (bytes) <= (length) \
b5664da02625 (UNIBYTE_STR_AS_MULTIBYTE_P): Fix previous change.
Kenichi Handa <handa@m17n.org>
parents: 31460
diff changeset
453 && (str)[0] != LEADING_CODE_8_BIT_CONTROL \
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
454 && !CHAR_HEAD_P ((str)[1]) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
455 && ((bytes) == 2 \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
456 || (!CHAR_HEAD_P ((str)[2]) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
457 && ((bytes) == 3 \
29539
a6f1d75744d8 (CHARSET_8_BIT_GRAPHIC): Define as 0x80.
Kenichi Handa <handa@m17n.org>
parents: 29004
diff changeset
458 || !CHAR_HEAD_P ((str)[3])))))))
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
459
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
460 /* Return 1 iff the byte sequence at multibyte string STR is valid as
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
461 a unibyte form. By a side effect, BYTES is set to the byte length
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
462 of one character at STR. */
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
463
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
464 #define MULTIBYTE_STR_AS_UNIBYTE_P(str, bytes) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
465 ((bytes) = BYTES_BY_CHAR_HEAD ((str)[0]), \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
466 (str)[0] != LEADING_CODE_8_BIT_CONTROL)
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
467
19319
9ea3c87ea61d Comment changes.
Richard M. Stallman <rms@gnu.org>
parents: 18341
diff changeset
468 /* The charset of character C is stored in CHARSET, and the
9ea3c87ea61d Comment changes.
Richard M. Stallman <rms@gnu.org>
parents: 18341
diff changeset
469 position-codes of C are stored in C1 and C2.
25583
2c0d3a9d33e8 Lots of comments fixed.
Kenichi Handa <handa@m17n.org>
parents: 25505
diff changeset
470 We store -1 in C2 if the dimension of the charset is 1. */
19319
9ea3c87ea61d Comment changes.
Richard M. Stallman <rms@gnu.org>
parents: 18341
diff changeset
471
30282
3d210892f575 (MAKE_CHAR): Return reasonable code even if CHARSET is undefined.
Kenichi Handa <handa@m17n.org>
parents: 29570
diff changeset
472 #define SPLIT_CHAR(c, charset, c1, c2) \
3d210892f575 (MAKE_CHAR): Return reasonable code even if CHARSET is undefined.
Kenichi Handa <handa@m17n.org>
parents: 29570
diff changeset
473 (SINGLE_BYTE_CHAR_P (c) \
3d210892f575 (MAKE_CHAR): Return reasonable code even if CHARSET is undefined.
Kenichi Handa <handa@m17n.org>
parents: 29570
diff changeset
474 ? ((charset \
3d210892f575 (MAKE_CHAR): Return reasonable code even if CHARSET is undefined.
Kenichi Handa <handa@m17n.org>
parents: 29570
diff changeset
475 = (ASCII_BYTE_P (c) \
3d210892f575 (MAKE_CHAR): Return reasonable code even if CHARSET is undefined.
Kenichi Handa <handa@m17n.org>
parents: 29570
diff changeset
476 ? CHARSET_ASCII \
3d210892f575 (MAKE_CHAR): Return reasonable code even if CHARSET is undefined.
Kenichi Handa <handa@m17n.org>
parents: 29570
diff changeset
477 : ((c) < 0xA0 ? CHARSET_8_BIT_CONTROL : CHARSET_8_BIT_GRAPHIC))), \
3d210892f575 (MAKE_CHAR): Return reasonable code even if CHARSET is undefined.
Kenichi Handa <handa@m17n.org>
parents: 29570
diff changeset
478 c1 = (c), c2 = -1) \
3d210892f575 (MAKE_CHAR): Return reasonable code even if CHARSET is undefined.
Kenichi Handa <handa@m17n.org>
parents: 29570
diff changeset
479 : ((c) & CHAR_FIELD1_MASK \
3d210892f575 (MAKE_CHAR): Return reasonable code even if CHARSET is undefined.
Kenichi Handa <handa@m17n.org>
parents: 29570
diff changeset
480 ? (charset = (CHAR_FIELD1 (c) \
3d210892f575 (MAKE_CHAR): Return reasonable code even if CHARSET is undefined.
Kenichi Handa <handa@m17n.org>
parents: 29570
diff changeset
481 + ((c) < MIN_CHAR_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)), \
3d210892f575 (MAKE_CHAR): Return reasonable code even if CHARSET is undefined.
Kenichi Handa <handa@m17n.org>
parents: 29570
diff changeset
482 c1 = CHAR_FIELD2 (c), \
3d210892f575 (MAKE_CHAR): Return reasonable code even if CHARSET is undefined.
Kenichi Handa <handa@m17n.org>
parents: 29570
diff changeset
483 c2 = CHAR_FIELD3 (c)) \
3d210892f575 (MAKE_CHAR): Return reasonable code even if CHARSET is undefined.
Kenichi Handa <handa@m17n.org>
parents: 29570
diff changeset
484 : (charset = CHAR_FIELD2 (c) + 0x70, \
3d210892f575 (MAKE_CHAR): Return reasonable code even if CHARSET is undefined.
Kenichi Handa <handa@m17n.org>
parents: 29570
diff changeset
485 c1 = CHAR_FIELD3 (c), \
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
486 c2 = -1)))
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
487
25505
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
488 /* Return 1 iff character C has valid printable glyph. */
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
489 #define CHAR_PRINTABLE_P(c) (ASCII_BYTE_P (c) || char_printable_p (c))
25505
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
490
19319
9ea3c87ea61d Comment changes.
Richard M. Stallman <rms@gnu.org>
parents: 18341
diff changeset
491 /* The charset of the character at STR is stored in CHARSET, and the
9ea3c87ea61d Comment changes.
Richard M. Stallman <rms@gnu.org>
parents: 18341
diff changeset
492 position-codes are stored in C1 and C2.
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
493 We store -1 in C2 if the character is just 2 bytes. */
19319
9ea3c87ea61d Comment changes.
Richard M. Stallman <rms@gnu.org>
parents: 18341
diff changeset
494
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
495 #define SPLIT_STRING(str, len, charset, c1, c2) \
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
496 ((BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) < 2 \
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
497 || BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) > len \
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
498 || split_string (str, len, &charset, &c1, &c2) < 0) \
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
499 ? c1 = *(str), charset = CHARSET_ASCII \
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
500 : charset)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
501
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
502 /* Mapping table from ISO2022's charset (specified by DIMENSION,
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
503 CHARS, and FINAL_CHAR) to Emacs' charset. Should be accessed by
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
504 macro ISO_CHARSET_TABLE (DIMENSION, CHARS, FINAL_CHAR). */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
505 extern int iso_charset_table[2][2][128];
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
506
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
507 #define ISO_CHARSET_TABLE(dimension, chars, final_char) \
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
508 iso_charset_table[XINT (dimension) - 1][XINT (chars) > 94][XINT (final_char)]
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
509
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
510 #define BASE_LEADING_CODE_P(c) (BYTES_BY_CHAR_HEAD ((unsigned char) (c)) > 1)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
511
23130
f2cee3bcef78 (CHAR_BYTES): New macro.
Kenichi Handa <handa@m17n.org>
parents: 22702
diff changeset
512 /* Return how many bytes C will occupy in a multibyte buffer. */
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
513 #define CHAR_BYTES(c) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
514 (SINGLE_BYTE_CHAR_P (c) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
515 ? ((ASCII_BYTE_P (c) || (c) >= 0xA0) ? 1 : 2) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
516 : char_bytes (c))
23130
f2cee3bcef78 (CHAR_BYTES): New macro.
Kenichi Handa <handa@m17n.org>
parents: 22702
diff changeset
517
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
518 /* The following two macros CHAR_STRING and STRING_CHAR are the main
35478
340a9e4aeb29 comment fixes
Dave Love <fx@gnu.org>
parents: 35408
diff changeset
519 entry points to convert between Emacs's two types of character
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
520 representations: multi-byte form and single-word form (character
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
521 code). */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
522
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
523 /* Store multi-byte form of the character C in STR. The caller should
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
524 allocate at least MAX_MULTIBYTE_LENGTH bytes area at STR in
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
525 advance. Returns the length of the multi-byte form. If C is an
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
526 invalid character code, signal an error. */
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
527
32353
803ca8bc08b2 (CHAR_STRING): Optimized for single byte characters.
Kenichi Handa <handa@m17n.org>
parents: 31703
diff changeset
528 #define CHAR_STRING(c, str) \
803ca8bc08b2 (CHAR_STRING): Optimized for single byte characters.
Kenichi Handa <handa@m17n.org>
parents: 31703
diff changeset
529 (SINGLE_BYTE_CHAR_P (c) \
803ca8bc08b2 (CHAR_STRING): Optimized for single byte characters.
Kenichi Handa <handa@m17n.org>
parents: 31703
diff changeset
530 ? ((ASCII_BYTE_P (c) || c >= 0xA0) \
803ca8bc08b2 (CHAR_STRING): Optimized for single byte characters.
Kenichi Handa <handa@m17n.org>
parents: 31703
diff changeset
531 ? (*(str) = (unsigned char)(c), 1) \
803ca8bc08b2 (CHAR_STRING): Optimized for single byte characters.
Kenichi Handa <handa@m17n.org>
parents: 31703
diff changeset
532 : (*(str) = LEADING_CODE_8_BIT_CONTROL, *((str)+ 1) = c + 0x20, 2)) \
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
533 : char_to_string (c, (unsigned char *) str))
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
534
38395
b7a9187751b2 (CHAR_STRING_NO_SIGNAL): New macro.
Gerd Moellmann <gerd@gnu.org>
parents: 35949
diff changeset
535 /* Like CHAR_STRING but don't signal an error if C is invalid.
b7a9187751b2 (CHAR_STRING_NO_SIGNAL): New macro.
Gerd Moellmann <gerd@gnu.org>
parents: 35949
diff changeset
536 Value is -1 in this case. */
b7a9187751b2 (CHAR_STRING_NO_SIGNAL): New macro.
Gerd Moellmann <gerd@gnu.org>
parents: 35949
diff changeset
537
b7a9187751b2 (CHAR_STRING_NO_SIGNAL): New macro.
Gerd Moellmann <gerd@gnu.org>
parents: 35949
diff changeset
538 #define CHAR_STRING_NO_SIGNAL(c, str) \
b7a9187751b2 (CHAR_STRING_NO_SIGNAL): New macro.
Gerd Moellmann <gerd@gnu.org>
parents: 35949
diff changeset
539 (SINGLE_BYTE_CHAR_P (c) \
b7a9187751b2 (CHAR_STRING_NO_SIGNAL): New macro.
Gerd Moellmann <gerd@gnu.org>
parents: 35949
diff changeset
540 ? ((ASCII_BYTE_P (c) || c >= 0xA0) \
b7a9187751b2 (CHAR_STRING_NO_SIGNAL): New macro.
Gerd Moellmann <gerd@gnu.org>
parents: 35949
diff changeset
541 ? (*(str) = (unsigned char)(c), 1) \
b7a9187751b2 (CHAR_STRING_NO_SIGNAL): New macro.
Gerd Moellmann <gerd@gnu.org>
parents: 35949
diff changeset
542 : (*(str) = LEADING_CODE_8_BIT_CONTROL, *((str)+ 1) = c + 0x20, 2)) \
b7a9187751b2 (CHAR_STRING_NO_SIGNAL): New macro.
Gerd Moellmann <gerd@gnu.org>
parents: 35949
diff changeset
543 : char_to_string_1 (c, (unsigned char *) str))
b7a9187751b2 (CHAR_STRING_NO_SIGNAL): New macro.
Gerd Moellmann <gerd@gnu.org>
parents: 35949
diff changeset
544
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
545 /* Return a character code of the character of which multi-byte form
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
546 is at STR and the length is LEN. If STR doesn't contain valid
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
547 multi-byte form, only the first byte in STR is returned. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
548
23653
67907ea2c6ac (SPLIT_NON_ASCII_CHAR): Check dimension of an invalid
Kenichi Handa <handa@m17n.org>
parents: 23648
diff changeset
549 #define STRING_CHAR(str, len) \
67907ea2c6ac (SPLIT_NON_ASCII_CHAR): Check dimension of an invalid
Kenichi Handa <handa@m17n.org>
parents: 23648
diff changeset
550 (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
67907ea2c6ac (SPLIT_NON_ASCII_CHAR): Check dimension of an invalid
Kenichi Handa <handa@m17n.org>
parents: 23648
diff changeset
551 ? (unsigned char) *(str) \
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
552 : string_to_char (str, len, 0))
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
553
25505
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
554 /* This is like STRING_CHAR but the third arg ACTUAL_LEN is set to the
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
555 length of the multi-byte form. Just to know the length, use
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
556 MULTIBYTE_FORM_LENGTH. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
557
25505
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
558 #define STRING_CHAR_AND_LENGTH(str, len, actual_len) \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
559 (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
560 ? ((actual_len) = 1), (unsigned char) *(str) \
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
561 : string_to_char (str, len, &(actual_len)))
22184
003ac1231096 (STRING_CHAR_AND_CHAR_LENGTH): New macro.
Kenichi Handa <handa@m17n.org>
parents: 22167
diff changeset
562
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
563 /* Fetch the "next" character from Lisp string STRING at byte position
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
564 BYTEIDX, character position CHARIDX. Store it into OUTPUT.
20589
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
565
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
566 All the args must be side-effect-free.
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
567 BYTEIDX and CHARIDX must be lvalues;
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
568 we increment them past the character fetched. */
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
569
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
570 #define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
571 if (1) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
572 { \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
573 CHARIDX++; \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
574 if (STRING_MULTIBYTE (STRING)) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
575 { \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
576 unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
577 int space_left = XSTRING (STRING)->size_byte - BYTEIDX; \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
578 int actual_len; \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
579 \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
580 OUTPUT = STRING_CHAR_AND_LENGTH (ptr, space_left, actual_len); \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
581 BYTEIDX += actual_len; \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
582 } \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
583 else \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
584 OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
585 } \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
586 else
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
587
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
588 /* Like FETCH_STRING_CHAR_ADVANCE but assume STRING is multibyte. */
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
589
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
590 #define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
20589
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
591 if (1) \
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
592 { \
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
593 unsigned char *fetch_string_char_ptr = &XSTRING (STRING)->data[BYTEIDX]; \
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
594 int fetch_string_char_space_left = XSTRING (STRING)->size_byte - BYTEIDX; \
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
595 int actual_len; \
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
596 \
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
597 OUTPUT \
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
598 = STRING_CHAR_AND_LENGTH (fetch_string_char_ptr, \
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
599 fetch_string_char_space_left, actual_len); \
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
600 \
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
601 BYTEIDX += actual_len; \
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
602 CHARIDX++; \
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
603 } \
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
604 else
3acb053e757e (FETCH_STRING_CHAR_ADVANCE): New macro.
Richard M. Stallman <rms@gnu.org>
parents: 20531
diff changeset
605
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
606 /* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
607 buffer. */
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
608
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
609 #define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
610 if (1) \
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
611 { \
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
612 CHARIDX++; \
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
613 if (!NILP (current_buffer->enable_multibyte_characters)) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
614 { \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
615 unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
616 int space_left = ((CHARIDX < GPT ? GPT_BYTE : Z_BYTE) - BYTEIDX); \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
617 int actual_len; \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
618 \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
619 OUTPUT= STRING_CHAR_AND_LENGTH (ptr, space_left, actual_len); \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
620 BYTEIDX += actual_len; \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
621 } \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
622 else \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
623 { \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
624 OUTPUT = *(BYTE_POS_ADDR (BYTEIDX)); \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
625 BYTEIDX++; \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
626 } \
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
627 } \
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
628 else
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
629
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
630 /* Return the length of the multi-byte form at string STR of length LEN. */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
631
21444
0cba6f211d7c (MULTIBYTE_FORM_LENGTH): Don't check LEN here.
Kenichi Handa <handa@m17n.org>
parents: 21419
diff changeset
632 #define MULTIBYTE_FORM_LENGTH(str, len) \
0cba6f211d7c (MULTIBYTE_FORM_LENGTH): Don't check LEN here.
Kenichi Handa <handa@m17n.org>
parents: 21419
diff changeset
633 (BYTES_BY_CHAR_HEAD (*(unsigned char *)(str)) == 1 \
0cba6f211d7c (MULTIBYTE_FORM_LENGTH): Don't check LEN here.
Kenichi Handa <handa@m17n.org>
parents: 21419
diff changeset
634 ? 1 \
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
635 : multibyte_form_length (str, len))
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
636
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
637 #ifdef emacs
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
638
25505
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
639 /* Increase the buffer byte position POS_BYTE of the current buffer to
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
640 the next character boundary. This macro relies on the fact that
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
641 *GPT_ADDR and *Z_ADDR are always accessible and the values are
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
642 '\0'. No range checking of POS. */
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
643
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
644 #ifdef BYTE_COMBINING_DEBUG
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
645
25505
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
646 #define INC_POS(pos_byte) \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
647 do { \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
648 unsigned char *p = BYTE_POS_ADDR (pos_byte); \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
649 if (BASE_LEADING_CODE_P (*p)) \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
650 { \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
651 int len, bytes; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
652 len = Z_BYTE - pos_byte; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
653 PARSE_MULTIBYTE_SEQ (p, len, bytes); \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
654 pos_byte += bytes; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
655 } \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
656 else \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
657 pos_byte++; \
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
658 } while (0)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
659
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
660 #else /* not BYTE_COMBINING_DEBUG */
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
661
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
662 #define INC_POS(pos_byte) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
663 do { \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
664 unsigned char *p = BYTE_POS_ADDR (pos_byte); \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
665 pos_byte += BYTES_BY_CHAR_HEAD (*p); \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
666 } while (0)
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
667
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
668 #endif /* not BYTE_COMBINING_DEBUG */
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
669
25505
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
670 /* Decrease the buffer byte position POS_BYTE of the current buffer to
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
671 the previous character boundary. No range checking of POS. */
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
672 #define DEC_POS(pos_byte) \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
673 do { \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
674 unsigned char *p, *p_min; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
675 \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
676 pos_byte--; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
677 if (pos_byte < GPT_BYTE) \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
678 p = BEG_ADDR + pos_byte - 1, p_min = BEG_ADDR; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
679 else \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
680 p = BEG_ADDR + GAP_SIZE + pos_byte - 1, p_min = GAP_END_ADDR; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
681 if (p > p_min && !CHAR_HEAD_P (*p)) \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
682 { \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
683 unsigned char *pend = p--; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
684 int len, bytes; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
685 while (p > p_min && !CHAR_HEAD_P (*p)) p--; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
686 len = pend + 1 - p; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
687 PARSE_MULTIBYTE_SEQ (p, len, bytes); \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
688 if (bytes == len) \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
689 pos_byte -= len - 1; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
690 } \
20531
f019e056ad9a (CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents: 20352
diff changeset
691 } while (0)
f019e056ad9a (CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents: 20352
diff changeset
692
f019e056ad9a (CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents: 20352
diff changeset
693 /* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */
f019e056ad9a (CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents: 20352
diff changeset
694
20904
79d73f468e38 (INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
parents: 20759
diff changeset
695 #define INC_BOTH(charpos, bytepos) \
79d73f468e38 (INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
parents: 20759
diff changeset
696 do \
79d73f468e38 (INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
parents: 20759
diff changeset
697 { \
79d73f468e38 (INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
parents: 20759
diff changeset
698 (charpos)++; \
79d73f468e38 (INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
parents: 20759
diff changeset
699 if (NILP (current_buffer->enable_multibyte_characters)) \
79d73f468e38 (INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
parents: 20759
diff changeset
700 (bytepos)++; \
79d73f468e38 (INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
parents: 20759
diff changeset
701 else \
79d73f468e38 (INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
parents: 20759
diff changeset
702 INC_POS ((bytepos)); \
79d73f468e38 (INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
parents: 20759
diff changeset
703 } \
20531
f019e056ad9a (CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents: 20352
diff changeset
704 while (0)
f019e056ad9a (CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents: 20352
diff changeset
705
f019e056ad9a (CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents: 20352
diff changeset
706 /* Decrement both CHARPOS and BYTEPOS, each in the appropriate way. */
f019e056ad9a (CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents: 20352
diff changeset
707
20904
79d73f468e38 (INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
parents: 20759
diff changeset
708 #define DEC_BOTH(charpos, bytepos) \
79d73f468e38 (INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
parents: 20759
diff changeset
709 do \
79d73f468e38 (INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
parents: 20759
diff changeset
710 { \
79d73f468e38 (INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
parents: 20759
diff changeset
711 (charpos)--; \
79d73f468e38 (INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
parents: 20759
diff changeset
712 if (NILP (current_buffer->enable_multibyte_characters)) \
79d73f468e38 (INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
parents: 20759
diff changeset
713 (bytepos)--; \
79d73f468e38 (INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
parents: 20759
diff changeset
714 else \
79d73f468e38 (INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
parents: 20759
diff changeset
715 DEC_POS ((bytepos)); \
79d73f468e38 (INC_BOTH, DEC_BOTH): In unibyte mode, simply increment bytepos.
Richard M. Stallman <rms@gnu.org>
parents: 20759
diff changeset
716 } \
20531
f019e056ad9a (CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents: 20352
diff changeset
717 while (0)
f019e056ad9a (CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents: 20352
diff changeset
718
25505
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
719 /* Increase the buffer byte position POS_BYTE of the current buffer to
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
720 the next character boundary. This macro relies on the fact that
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
721 *GPT_ADDR and *Z_ADDR are always accessible and the values are
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
722 '\0'. No range checking of POS_BYTE. */
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
723
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
724 #ifdef BYTE_COMBINING_DEBUG
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
725
25505
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
726 #define BUF_INC_POS(buf, pos_byte) \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
727 do { \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
728 unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
729 if (BASE_LEADING_CODE_P (*p)) \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
730 { \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
731 int len, bytes; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
732 len = BUF_Z_BYTE (buf) - pos_byte; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
733 PARSE_MULTIBYTE_SEQ (p, len, bytes); \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
734 pos_byte += bytes; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
735 } \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
736 else \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
737 pos_byte++; \
20531
f019e056ad9a (CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents: 20352
diff changeset
738 } while (0)
f019e056ad9a (CHAR_HEAD_P): Take char, not pointer, as arg.
Richard M. Stallman <rms@gnu.org>
parents: 20352
diff changeset
739
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
740 #else /* not BYTE_COMBINING_DEBUG */
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
741
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
742 #define BUF_INC_POS(buf, pos_byte) \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
743 do { \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
744 unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
745 pos_byte += BYTES_BY_CHAR_HEAD (*p); \
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
746 } while (0)
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
747
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
748 #endif /* not BYTE_COMBINING_DEBUG */
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
749
25505
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
750 /* Decrease the buffer byte position POS_BYTE of the current buffer to
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
751 the previous character boundary. No range checking of POS_BYTE. */
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
752 #define BUF_DEC_POS(buf, pos_byte) \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
753 do { \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
754 unsigned char *p, *p_min; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
755 pos_byte--; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
756 if (pos_byte < BUF_GPT_BYTE (buf)) \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
757 { \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
758 p = BUF_BEG_ADDR (buf) + pos_byte - 1; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
759 p_min = BUF_BEG_ADDR (buf); \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
760 } \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
761 else \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
762 { \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
763 p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - 1; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
764 p_min = BUF_GAP_END_ADDR (buf); \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
765 } \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
766 if (p > p_min && !CHAR_HEAD_P (*p)) \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
767 { \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
768 unsigned char *pend = p--; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
769 int len, bytes; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
770 while (p > p_min && !CHAR_HEAD_P (*p)) p--; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
771 len = pend + 1 - p; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
772 PARSE_MULTIBYTE_SEQ (p, len, bytes); \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
773 if (bytes == len) \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
774 pos_byte -= len - 1; \
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
775 } \
17052
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
776 } while (0)
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
777
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
778 #endif /* emacs */
d0d7b244b1d0 Initial revision
Karl Heuer <kwzh@gnu.org>
parents:
diff changeset
779
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
780 /* This is the maximum byte length of multi-byte sequence. */
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
781 #define MAX_MULTIBYTE_LENGTH 4
17185
0d5a1bae9d73 (MAX_CHARSET): Definition changed to the actual
Kenichi Handa <handa@m17n.org>
parents: 17120
diff changeset
782
20932
3c2c8431c51d (INC_POS): Use macro BASE_LEADING_CODE_P.
Kenichi Handa <handa@m17n.org>
parents: 20904
diff changeset
783 extern void invalid_character P_ ((int));
3c2c8431c51d (INC_POS): Use macro BASE_LEADING_CODE_P.
Kenichi Handa <handa@m17n.org>
parents: 20904
diff changeset
784
22120
90f77c401689 Change terms unify/unification to
Kenichi Handa <handa@m17n.org>
parents: 21444
diff changeset
785 extern int translate_char P_ ((Lisp_Object, int, int, int, int));
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
786 extern int split_string P_ ((const unsigned char *, int, int *,
20308
8d520e3dcb86 Add more prototypes and function declarations.
Andreas Schwab <schwab@suse.de>
parents: 20257
diff changeset
787 unsigned char *, unsigned char *));
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
788 extern int char_to_string P_ ((int, unsigned char *));
38395
b7a9187751b2 (CHAR_STRING_NO_SIGNAL): New macro.
Gerd Moellmann <gerd@gnu.org>
parents: 35949
diff changeset
789 extern int char_to_string_1 P_ ((int, unsigned char *));
26843
0aadeca4a4a7 In this entry, just `Modified' means that codes for a
Kenichi Handa <handa@m17n.org>
parents: 25637
diff changeset
790 extern int string_to_char P_ ((const unsigned char *, int, int *));
25505
4d5f87073d63 (MAKE_NON_ASCII_CHAR): Handle the case that C1 or C2
Kenichi Handa <handa@m17n.org>
parents: 25234
diff changeset
791 extern int char_printable_p P_ ((int c));
21419
95aae2ff5fcd (string_to_non_ascii_char, multibyte_form_length)
Karl Heuer <kwzh@gnu.org>
parents: 21416
diff changeset
792 extern int multibyte_form_length P_ ((const unsigned char *, int));
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
793 extern void parse_str_as_multibyte P_ ((unsigned char *, int, int *, int *));
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
794 extern int str_as_multibyte P_ ((unsigned char *, int, int, int *));
35949
6f52e8c1039f (parse_str_to_multibyte): Extern it.
Kenichi Handa <handa@m17n.org>
parents: 35478
diff changeset
795 extern int parse_str_to_multibyte P_ ((unsigned char *, int));
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
796 extern int str_to_multibyte P_ ((unsigned char *, int, int));
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
797 extern int str_as_unibyte P_ ((unsigned char *, int));
20308
8d520e3dcb86 Add more prototypes and function declarations.
Andreas Schwab <schwab@suse.de>
parents: 20257
diff changeset
798 extern int get_charset_id P_ ((Lisp_Object));
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
799 extern int find_charset_in_text P_ ((unsigned char *, int, int, int *,
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
800 Lisp_Object));
20308
8d520e3dcb86 Add more prototypes and function declarations.
Andreas Schwab <schwab@suse.de>
parents: 20257
diff changeset
801 extern int strwidth P_ ((unsigned char *, int));
35408
9a2cf1e0032c (c_string_width, lisp_string_width): Extern them.
Kenichi Handa <handa@m17n.org>
parents: 32353
diff changeset
802 extern int c_string_width P_ ((unsigned char *, int, int, int *, int *));
9a2cf1e0032c (c_string_width, lisp_string_width): Extern them.
Kenichi Handa <handa@m17n.org>
parents: 32353
diff changeset
803 extern int lisp_string_width P_ ((Lisp_Object, int, int *, int *));
23130
f2cee3bcef78 (CHAR_BYTES): New macro.
Kenichi Handa <handa@m17n.org>
parents: 22702
diff changeset
804 extern int char_bytes P_ ((int));
23810
e473bf3419a9 Declare char_valid_p.
Andreas Schwab <schwab@suse.de>
parents: 23766
diff changeset
805 extern int char_valid_p P_ ((int, int));
17726
b5f2a7d6a2d0 (unify_char): Extern it.
Kenichi Handa <handa@m17n.org>
parents: 17321
diff changeset
806
22184
003ac1231096 (STRING_CHAR_AND_CHAR_LENGTH): New macro.
Kenichi Handa <handa@m17n.org>
parents: 22167
diff changeset
807 extern Lisp_Object Vtranslation_table_vector;
22120
90f77c401689 Change terms unify/unification to
Kenichi Handa <handa@m17n.org>
parents: 21444
diff changeset
808
22184
003ac1231096 (STRING_CHAR_AND_CHAR_LENGTH): New macro.
Kenichi Handa <handa@m17n.org>
parents: 22167
diff changeset
809 /* Return a translation table of id number ID. */
22120
90f77c401689 Change terms unify/unification to
Kenichi Handa <handa@m17n.org>
parents: 21444
diff changeset
810 #define GET_TRANSLATION_TABLE(id) \
25637
2e3ab6dece24 (GET_TRANSLATION_TABLE): Use XCDR.
Ken Raeburn <raeburn@raeburn.org>
parents: 25583
diff changeset
811 (XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)]))
20719
78d95f2a9d92 (BCOPY_SHORT): New macro.
Kenichi Handa <handa@m17n.org>
parents: 20708
diff changeset
812
23488
958ab288116d (Vauto_fill_chars): Extern it.
Kenichi Handa <handa@m17n.org>
parents: 23202
diff changeset
813 /* A char-table for characters which may invoke auto-filling. */
958ab288116d (Vauto_fill_chars): Extern it.
Kenichi Handa <handa@m17n.org>
parents: 23202
diff changeset
814 extern Lisp_Object Vauto_fill_chars;
958ab288116d (Vauto_fill_chars): Extern it.
Kenichi Handa <handa@m17n.org>
parents: 23202
diff changeset
815
20719
78d95f2a9d92 (BCOPY_SHORT): New macro.
Kenichi Handa <handa@m17n.org>
parents: 20708
diff changeset
816 /* Copy LEN bytes from FROM to TO. This macro should be used only
78d95f2a9d92 (BCOPY_SHORT): New macro.
Kenichi Handa <handa@m17n.org>
parents: 20708
diff changeset
817 when a caller knows that LEN is short and the obvious copy loop is
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
818 faster than calling bcopy which has some overhead. Copying a
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
819 multibyte sequence of a multibyte character is the typical case. */
20719
78d95f2a9d92 (BCOPY_SHORT): New macro.
Kenichi Handa <handa@m17n.org>
parents: 20708
diff changeset
820
78d95f2a9d92 (BCOPY_SHORT): New macro.
Kenichi Handa <handa@m17n.org>
parents: 20708
diff changeset
821 #define BCOPY_SHORT(from, to, len) \
78d95f2a9d92 (BCOPY_SHORT): New macro.
Kenichi Handa <handa@m17n.org>
parents: 20708
diff changeset
822 do { \
78d95f2a9d92 (BCOPY_SHORT): New macro.
Kenichi Handa <handa@m17n.org>
parents: 20708
diff changeset
823 int i = len; \
25234
17336b4fc693 (BCOPY_SHORT): Fix typo `unsigined'.
Karl Heuer <kwzh@gnu.org>
parents: 25016
diff changeset
824 unsigned char *from_p = from, *to_p = to; \
29004
383e4e21306a (LEADING_CODE_8_BIT_CONTROL, CHARSET_8_BIT_CONTROL,
Kenichi Handa <handa@m17n.org>
parents: 28513
diff changeset
825 while (i--) *to_p++ = *from_p++; \
20719
78d95f2a9d92 (BCOPY_SHORT): New macro.
Kenichi Handa <handa@m17n.org>
parents: 20708
diff changeset
826 } while (0)
78d95f2a9d92 (BCOPY_SHORT): New macro.
Kenichi Handa <handa@m17n.org>
parents: 20708
diff changeset
827
29570
825505ff211e (EMACS_CHARSET_H): Renamed from _CHARSET_H.
Kenichi Handa <handa@m17n.org>
parents: 29539
diff changeset
828 #endif /* EMACS_CHARSET_H */