Mercurial > emacs
comparison src/coding.c @ 22616:c493ce6a31e4
(setup_raw_text_coding_system): New function.
(decode_coding_sjis_big5): Bug for handling invalid code fixed.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Fri, 26 Jun 1998 03:29:15 +0000 |
parents | d9eac134a41b |
children | 3e3949ac4cfb |
comparison
equal
deleted
inserted
replaced
22615:bad8b17b0524 | 22616:c493ce6a31e4 |
---|---|
1988 | 1988 |
1989 --- CODE RANGE of SJIS --- | 1989 --- CODE RANGE of SJIS --- |
1990 (character set) (range) | 1990 (character set) (range) |
1991 ASCII 0x00 .. 0x7F | 1991 ASCII 0x00 .. 0x7F |
1992 KATAKANA-JISX0201 0xA0 .. 0xDF | 1992 KATAKANA-JISX0201 0xA0 .. 0xDF |
1993 JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xFF | 1993 JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xEF |
1994 (2nd byte) 0x40 .. 0xFF | 1994 (2nd byte) 0x40 .. 0xFF |
1995 ------------------------------- | 1995 ------------------------------- |
1996 | 1996 |
1997 */ | 1997 */ |
1998 | 1998 |
2234 *dst++ = c1; | 2234 *dst++ = c1; |
2235 coding->produced_char++; | 2235 coding->produced_char++; |
2236 } | 2236 } |
2237 else if (c1 < 0x80) | 2237 else if (c1 < 0x80) |
2238 DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2); | 2238 DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2); |
2239 else if (c1 < 0xA0) | 2239 else |
2240 { | 2240 { |
2241 /* SJIS -> JISX0208 */ | |
2242 if (sjis_p) | 2241 if (sjis_p) |
2243 { | 2242 { |
2244 ONE_MORE_BYTE (c2); | 2243 if (c1 < 0xA0 || (c1 >= 0xE0 && c1 < 0xF0)) |
2245 if (c2 >= 0x40) | |
2246 { | 2244 { |
2247 DECODE_SJIS (c1, c2, c3, c4); | 2245 /* SJIS -> JISX0208 */ |
2248 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | 2246 ONE_MORE_BYTE (c2); |
2247 if (c2 >= 0x40) | |
2248 { | |
2249 DECODE_SJIS (c1, c2, c3, c4); | |
2250 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | |
2251 } | |
2252 else | |
2253 goto label_invalid_code_2; | |
2249 } | 2254 } |
2255 else if (c1 < 0xE0) | |
2256 /* SJIS -> JISX0201-Kana */ | |
2257 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1, | |
2258 /* dummy */ c2); | |
2250 else | 2259 else |
2251 goto label_invalid_code_2; | 2260 goto label_invalid_code_1; |
2252 } | |
2253 else | |
2254 goto label_invalid_code_1; | |
2255 } | |
2256 else if (c1 < 0xE0) | |
2257 { | |
2258 /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */ | |
2259 if (sjis_p) | |
2260 DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1, | |
2261 /* dummy */ c2); | |
2262 else | |
2263 { | |
2264 int charset; | |
2265 | |
2266 ONE_MORE_BYTE (c2); | |
2267 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) | |
2268 { | |
2269 DECODE_BIG5 (c1, c2, charset, c3, c4); | |
2270 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | |
2271 } | |
2272 else | |
2273 goto label_invalid_code_2; | |
2274 } | |
2275 } | |
2276 else /* C1 >= 0xE0 */ | |
2277 { | |
2278 /* SJIS -> JISX0208, BIG5 -> Big5 */ | |
2279 if (sjis_p) | |
2280 { | |
2281 ONE_MORE_BYTE (c2); | |
2282 if (c2 >= 0x40) | |
2283 { | |
2284 DECODE_SJIS (c1, c2, c3, c4); | |
2285 DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4); | |
2286 } | |
2287 else | |
2288 goto label_invalid_code_2; | |
2289 } | 2261 } |
2290 else | 2262 else |
2291 { | 2263 { |
2292 int charset; | 2264 /* BIG5 -> Big5 */ |
2293 | 2265 if (c1 >= 0xA1 && c1 <= 0xFE) |
2294 ONE_MORE_BYTE (c2); | |
2295 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) | |
2296 { | 2266 { |
2297 DECODE_BIG5 (c1, c2, charset, c3, c4); | 2267 ONE_MORE_BYTE (c2); |
2298 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | 2268 if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) |
2269 { | |
2270 int charset; | |
2271 | |
2272 DECODE_BIG5 (c1, c2, charset, c3, c4); | |
2273 DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4); | |
2274 } | |
2275 else | |
2276 goto label_invalid_code_2; | |
2299 } | 2277 } |
2300 else | 2278 else |
2301 goto label_invalid_code_2; | 2279 goto label_invalid_code_1; |
2302 } | 2280 } |
2303 } | 2281 } |
2304 continue; | 2282 continue; |
2305 | 2283 |
2306 label_invalid_code_1: | 2284 label_invalid_code_1: |
3083 coding->category_idx = CODING_CATEGORY_IDX_BINARY; | 3061 coding->category_idx = CODING_CATEGORY_IDX_BINARY; |
3084 coding->common_flags = 0; | 3062 coding->common_flags = 0; |
3085 coding->eol_type = CODING_EOL_LF; | 3063 coding->eol_type = CODING_EOL_LF; |
3086 coding->pre_write_conversion = coding->post_read_conversion = Qnil; | 3064 coding->pre_write_conversion = coding->post_read_conversion = Qnil; |
3087 return -1; | 3065 return -1; |
3066 } | |
3067 | |
3068 /* Setup raw-text or one of its subsidiaries in the structure | |
3069 coding_system CODING according to the already setup value eol_type | |
3070 in CODING. CODING should be setup for some coding system in | |
3071 advance. */ | |
3072 | |
3073 void | |
3074 setup_raw_text_coding_system (coding) | |
3075 struct coding_system *coding; | |
3076 { | |
3077 if (coding->type != coding_type_raw_text) | |
3078 { | |
3079 coding->symbol = Qraw_text; | |
3080 coding->type = coding_type_raw_text; | |
3081 if (coding->eol_type != CODING_EOL_UNDECIDED) | |
3082 { | |
3083 Lisp_Object subsidiaries = Fget (Qraw_text, Qeol_type); | |
3084 | |
3085 if (VECTORP (subsidiaries) | |
3086 && XVECTOR (subsidiaries)->size == 3) | |
3087 coding->symbol | |
3088 = XVECTOR (subsidiaries)->contents[coding->eol_type]; | |
3089 } | |
3090 } | |
3091 return; | |
3088 } | 3092 } |
3089 | 3093 |
3090 /* Emacs has a mechanism to automatically detect a coding system if it | 3094 /* Emacs has a mechanism to automatically detect a coding system if it |
3091 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But, | 3095 is one of Emacs' internal format, ISO2022, SJIS, and BIG5. But, |
3092 it's impossible to distinguish some coding systems accurately | 3096 it's impossible to distinguish some coding systems accurately |