comparison src/coding.c @ 89225:32058afc72e2

(detect_coding_charset): If only ASCII bytes are found, return 0. (detect_coding_system): Fix previous change. (Fdefine_coding_system_internal): Setup CODING_ATTR_ASCII_COMPAT (attrs) correctly.
author Kenichi Handa <handa@m17n.org>
date Wed, 16 Oct 2002 05:03:55 +0000
parents e6779a6185ca
children 101ee928c088
comparison
equal deleted inserted replaced
89224:d77928dacd87 89225:32058afc72e2
4368 unsigned char *src = coding->source, *src_base = src; 4368 unsigned char *src = coding->source, *src_base = src;
4369 unsigned char *src_end = coding->source + coding->src_bytes; 4369 unsigned char *src_end = coding->source + coding->src_bytes;
4370 int multibytep = coding->src_multibyte; 4370 int multibytep = coding->src_multibyte;
4371 int consumed_chars = 0; 4371 int consumed_chars = 0;
4372 Lisp_Object attrs, valids; 4372 Lisp_Object attrs, valids;
4373 int found = 0;
4373 4374
4374 coding = &coding_categories[coding_category_charset]; 4375 coding = &coding_categories[coding_category_charset];
4375 attrs = CODING_ID_ATTRS (coding->id); 4376 attrs = CODING_ID_ATTRS (coding->id);
4376 valids = AREF (attrs, coding_attr_charset_valids); 4377 valids = AREF (attrs, coding_attr_charset_valids);
4377 4378
4383 int c; 4384 int c;
4384 4385
4385 ONE_MORE_BYTE (c); 4386 ONE_MORE_BYTE (c);
4386 if (NILP (AREF (valids, c))) 4387 if (NILP (AREF (valids, c)))
4387 break; 4388 break;
4389 if (c >= 0x80)
4390 found = 1;
4388 } 4391 }
4389 *mask &= ~CATEGORY_MASK_CHARSET; 4392 *mask &= ~CATEGORY_MASK_CHARSET;
4390 return 0; 4393 return 0;
4391 4394
4392 no_more_source: 4395 no_more_source:
4393 return 1; 4396 return (found || NILP (CODING_ATTR_ASCII_COMPAT (attrs)));
4394 } 4397 }
4395 4398
4396 static void 4399 static void
4397 decode_coding_charset (coding) 4400 decode_coding_charset (coding)
4398 struct coding_system *coding; 4401 struct coding_system *coding;
6321 { 6324 {
6322 c = *src; 6325 c = *src;
6323 if (c & 0x80 6326 if (c & 0x80
6324 || (c < 0x20 && (c == ISO_CODE_ESC 6327 || (c < 0x20 && (c == ISO_CODE_ESC
6325 || c == ISO_CODE_SI 6328 || c == ISO_CODE_SI
6326 || c == ISO_CODE_SO 6329 || c == ISO_CODE_SO)))
6327 /* Most UTF-16 text contains '\0'. */
6328 || !c)))
6329 break; 6330 break;
6330 } 6331 }
6331 coding.head_ascii = src - coding.source; 6332 coding.head_ascii = src - coding.source;
6332 6333
6333 if (src < src_end) 6334 if (src < src_end)
7469 make_number (255)); 7470 make_number (255));
7470 for (tail = charset_list; CONSP (tail); tail = XCDR (tail)) 7471 for (tail = charset_list; CONSP (tail); tail = XCDR (tail))
7471 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0; 7472 XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0;
7472 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets; 7473 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
7473 7474
7475 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
7476
7474 val = args[coding_arg_decode_translation_table]; 7477 val = args[coding_arg_decode_translation_table];
7475 if (! NILP (val)) 7478 if (! NILP (val))
7476 CHECK_CHAR_TABLE (val); 7479 CHECK_CHAR_TABLE (val);
7477 CODING_ATTR_DECODE_TBL (attrs) = val; 7480 CODING_ATTR_DECODE_TBL (attrs) = val;
7478 7481
7523 { 7526 {
7524 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail))); 7527 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail)));
7525 int dim = CHARSET_DIMENSION (charset); 7528 int dim = CHARSET_DIMENSION (charset);
7526 int idx = (dim - 1) * 4; 7529 int idx = (dim - 1) * 4;
7527 7530
7531 if (CHARSET_ASCII_COMPATIBLE_P (charset))
7532 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7533
7528 for (i = charset->code_space[idx]; 7534 for (i = charset->code_space[idx];
7529 i <= charset->code_space[idx + 1]; i++) 7535 i <= charset->code_space[idx + 1]; i++)
7530 { 7536 {
7531 Lisp_Object tmp, tmp2; 7537 Lisp_Object tmp, tmp2;
7532 int dim2; 7538 int dim2;
7609 } 7615 }
7610 else if (EQ (coding_type, Qutf_16)) 7616 else if (EQ (coding_type, Qutf_16))
7611 { 7617 {
7612 Lisp_Object bom, endian; 7618 Lisp_Object bom, endian;
7613 7619
7620 CODING_ATTR_ASCII_COMPAT (attrs) = Qnil;
7621
7614 if (nargs < coding_arg_utf16_max) 7622 if (nargs < coding_arg_utf16_max)
7615 goto short_args; 7623 goto short_args;
7616 7624
7617 bom = args[coding_arg_utf16_bom]; 7625 bom = args[coding_arg_utf16_bom];
7618 if (! NILP (bom) && ! EQ (bom, Qt)) 7626 if (! NILP (bom) && ! EQ (bom, Qt))
7649 for (i = 0; i < 4; i++) 7657 for (i = 0; i < 4; i++)
7650 { 7658 {
7651 val = Faref (initial, make_number (i)); 7659 val = Faref (initial, make_number (i));
7652 if (! NILP (val)) 7660 if (! NILP (val))
7653 { 7661 {
7654 CHECK_CHARSET_GET_ID (val, id); 7662 struct charset *charset;
7655 ASET (initial, i, make_number (id)); 7663
7664 CHECK_CHARSET_GET_CHARSET (val, charset);
7665 ASET (initial, i, make_number (CHARSET_ID (charset)));
7666 if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset))
7667 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7656 } 7668 }
7657 else 7669 else
7658 ASET (initial, i, make_number (-1)); 7670 ASET (initial, i, make_number (-1));
7659 } 7671 }
7660 7672
7711 } 7723 }
7712 else if (EQ (coding_type, Qemacs_mule)) 7724 else if (EQ (coding_type, Qemacs_mule))
7713 { 7725 {
7714 if (EQ (args[coding_arg_charset_list], Qemacs_mule)) 7726 if (EQ (args[coding_arg_charset_list], Qemacs_mule))
7715 ASET (attrs, coding_attr_emacs_mule_full, Qt); 7727 ASET (attrs, coding_attr_emacs_mule_full, Qt);
7716 7728 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7717 category = coding_category_emacs_mule; 7729 category = coding_category_emacs_mule;
7718 } 7730 }
7719 else if (EQ (coding_type, Qshift_jis)) 7731 else if (EQ (coding_type, Qshift_jis))
7720 { 7732 {
7721 7733
7726 7738
7727 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); 7739 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7728 if (CHARSET_DIMENSION (charset) != 1) 7740 if (CHARSET_DIMENSION (charset) != 1)
7729 error ("Dimension of charset %s is not one", 7741 error ("Dimension of charset %s is not one",
7730 XSYMBOL (CHARSET_NAME (charset))->name->data); 7742 XSYMBOL (CHARSET_NAME (charset))->name->data);
7743 if (CHARSET_ASCII_COMPATIBLE_P (charset))
7744 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7731 7745
7732 charset_list = XCDR (charset_list); 7746 charset_list = XCDR (charset_list);
7733 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); 7747 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7734 if (CHARSET_DIMENSION (charset) != 1) 7748 if (CHARSET_DIMENSION (charset) != 1)
7735 error ("Dimension of charset %s is not one", 7749 error ("Dimension of charset %s is not one",
7753 7767
7754 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); 7768 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7755 if (CHARSET_DIMENSION (charset) != 1) 7769 if (CHARSET_DIMENSION (charset) != 1)
7756 error ("Dimension of charset %s is not one", 7770 error ("Dimension of charset %s is not one",
7757 XSYMBOL (CHARSET_NAME (charset))->name->data); 7771 XSYMBOL (CHARSET_NAME (charset))->name->data);
7772 if (CHARSET_ASCII_COMPATIBLE_P (charset))
7773 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7758 7774
7759 charset_list = XCDR (charset_list); 7775 charset_list = XCDR (charset_list);
7760 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); 7776 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
7761 if (CHARSET_DIMENSION (charset) != 2) 7777 if (CHARSET_DIMENSION (charset) != 2)
7762 error ("Dimension of charset %s is not two", 7778 error ("Dimension of charset %s is not two",
7764 7780
7765 category = coding_category_big5; 7781 category = coding_category_big5;
7766 Vbig5_coding_system = name; 7782 Vbig5_coding_system = name;
7767 } 7783 }
7768 else if (EQ (coding_type, Qraw_text)) 7784 else if (EQ (coding_type, Qraw_text))
7769 category = coding_category_raw_text; 7785 {
7786 category = coding_category_raw_text;
7787 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7788 }
7770 else if (EQ (coding_type, Qutf_8)) 7789 else if (EQ (coding_type, Qutf_8))
7771 category = coding_category_utf_8; 7790 {
7791 category = coding_category_utf_8;
7792 CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
7793 }
7772 else if (EQ (coding_type, Qundecided)) 7794 else if (EQ (coding_type, Qundecided))
7773 category = coding_category_undecided; 7795 category = coding_category_undecided;
7774 else 7796 else
7775 error ("Invalid coding system type: %s", 7797 error ("Invalid coding system type: %s",
7776 XSYMBOL (coding_type)->name->data); 7798 XSYMBOL (coding_type)->name->data);