comparison src/coding.c @ 22616:c493ce6a31e4

(setup_raw_text_coding_system): New function. (decode_coding_sjis_big5): Bug for handling invalid code fixed.
author Kenichi Handa <handa@m17n.org>
date Fri, 26 Jun 1998 03:29:15 +0000
parents d9eac134a41b
children 3e3949ac4cfb
comparison
equal deleted inserted replaced
22615:bad8b17b0524 22616:c493ce6a31e4
1988 1988
1989 --- CODE RANGE of SJIS --- 1989 --- CODE RANGE of SJIS ---
1990 (character set) (range) 1990 (character set) (range)
1991 ASCII 0x00 .. 0x7F 1991 ASCII 0x00 .. 0x7F
1992 KATAKANA-JISX0201 0xA0 .. 0xDF 1992 KATAKANA-JISX0201 0xA0 .. 0xDF
1993 JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xFF 1993 JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xEF
1994 (2nd byte) 0x40 .. 0xFF 1994 (2nd byte) 0x40 .. 0xFF
1995 ------------------------------- 1995 -------------------------------
1996 1996
1997 */ 1997 */
1998 1998
2234 *dst++ = c1; 2234 *dst++ = c1;
2235 coding->produced_char++; 2235 coding->produced_char++;
2236 } 2236 }
2237 else if (c1 < 0x80) 2237 else if (c1 < 0x80)
2238 DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2); 2238 DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2239 else if (c1 < 0xA0) 2239 else
2240 { 2240 {
2241 /* SJIS -> JISX0208 */
2242 if (sjis_p) 2241 if (sjis_p)
2243 { 2242 {
2244 ONE_MORE_BYTE (c2); 2243 if (c1 < 0xA0 || (c1 >= 0xE0 && c1 < 0xF0))
2245 if (c2 >= 0x40)
2246 { 2244 {
2247 DECODE_SJIS (c1, c2, c3, c4); 2245 /* SJIS -> JISX0208 */
2248 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); 2246 ONE_MORE_BYTE (c2);
2247 if (c2 >= 0x40)
2248 {
2249 DECODE_SJIS (c1, c2, c3, c4);
2250 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2251 }
2252 else
2253 goto label_invalid_code_2;
2249 } 2254 }
2255 else if (c1 < 0xE0)
2256 /* SJIS -> JISX0201-Kana */
2257 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2258 /* dummy */ c2);
2250 else 2259 else
2251 goto label_invalid_code_2; 2260 goto label_invalid_code_1;
2252 }
2253 else
2254 goto label_invalid_code_1;
2255 }
2256 else if (c1 < 0xE0)
2257 {
2258 /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
2259 if (sjis_p)
2260 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2261 /* dummy */ c2);
2262 else
2263 {
2264 int charset;
2265
2266 ONE_MORE_BYTE (c2);
2267 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2268 {
2269 DECODE_BIG5 (c1, c2, charset, c3, c4);
2270 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2271 }
2272 else
2273 goto label_invalid_code_2;
2274 }
2275 }
2276 else /* C1 >= 0xE0 */
2277 {
2278 /* SJIS -> JISX0208, BIG5 -> Big5 */
2279 if (sjis_p)
2280 {
2281 ONE_MORE_BYTE (c2);
2282 if (c2 >= 0x40)
2283 {
2284 DECODE_SJIS (c1, c2, c3, c4);
2285 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2286 }
2287 else
2288 goto label_invalid_code_2;
2289 } 2261 }
2290 else 2262 else
2291 { 2263 {
2292 int charset; 2264 /* BIG5 -> Big5 */
2293 2265 if (c1 >= 0xA1 && c1 <= 0xFE)
2294 ONE_MORE_BYTE (c2);
2295 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2296 { 2266 {
2297 DECODE_BIG5 (c1, c2, charset, c3, c4); 2267 ONE_MORE_BYTE (c2);
2298 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); 2268 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2269 {
2270 int charset;
2271
2272 DECODE_BIG5 (c1, c2, charset, c3, c4);
2273 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2274 }
2275 else
2276 goto label_invalid_code_2;
2299 } 2277 }
2300 else 2278 else
2301 goto label_invalid_code_2; 2279 goto label_invalid_code_1;
2302 } 2280 }
2303 } 2281 }
2304 continue; 2282 continue;
2305 2283
2306 label_invalid_code_1: 2284 label_invalid_code_1:
3083 coding->category_idx = CODING_CATEGORY_IDX_BINARY; 3061 coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3084 coding->common_flags = 0; 3062 coding->common_flags = 0;
3085 coding->eol_type = CODING_EOL_LF; 3063 coding->eol_type = CODING_EOL_LF;
3086 coding->pre_write_conversion = coding->post_read_conversion = Qnil; 3064 coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3087 return -1; 3065 return -1;
3066 }
3067
3068 /* Setup raw-text or one of its subsidiaries in the structure
3069 coding_system CODING according to the already setup value eol_type
3070 in CODING. CODING should be setup for some coding system in
3071 advance. */
3072
3073 void
3074 setup_raw_text_coding_system (coding)
3075 struct coding_system *coding;
3076 {
3077 if (coding->type != coding_type_raw_text)
3078 {
3079 coding->symbol = Qraw_text;
3080 coding->type = coding_type_raw_text;
3081 if (coding->eol_type != CODING_EOL_UNDECIDED)
3082 {
3083 Lisp_Object subsidiaries = Fget (Qraw_text, Qeol_type);
3084
3085 if (VECTORP (subsidiaries)
3086 && XVECTOR (subsidiaries)->size == 3)
3087 coding->symbol
3088 = XVECTOR (subsidiaries)->contents[coding->eol_type];
3089 }
3090 }
3091 return;
3088 } 3092 }
3089 3093
3090 /* Emacs has a mechanism to automatically detect a coding system if it 3094 /* Emacs has a mechanism to automatically detect a coding system if it
3091 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But, 3095 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But,
3092 it's impossible to distinguish some coding systems accurately 3096 it's impossible to distinguish some coding systems accurately