comparison src/coding.c @ 89732:8acde12aba89

(get_translation_table): New function. (translate_chars): Fix the bug of skipping annotation data. (decode_coding): Utilze get_translation_table. (encode_coding): Likewise. (char_encodable_p): Translate char if necessary. (Funencodable_char_position): Likewise. (Ffind_coding_systems_region_internal): Setup translation table for encode in a coding system attribute vector in advance. (Fcheck_coding_systems_region): Likewise. (Fdefine_coding_system_internal): Allow a symbol as translation table. For shift-jis type coding system, allow 4th charset.
author Kenichi Handa <handa@m17n.org>
date Sun, 25 Jan 2004 07:30:47 +0000
parents d8fcefca5cf6
children 293c9235be3f
comparison
equal deleted inserted replaced
89731:e4b184cef172 89732:8acde12aba89
5482 coding->produced -= n; 5482 coding->produced -= n;
5483 coding->produced_char -= n; 5483 coding->produced_char -= n;
5484 } 5484 }
5485 } 5485 }
5486 5486
5487
5488 /* Return a translation table from coding system attribute vector ATTRS
5489 for encoding (ENCODEP is nonzero) or decoding (ENCODEP is zeor). */
5490
5491 static INLINE
5492 get_translation_table (attrs, encodep)
5493 {
5494 Lisp_Object standard, translation_table;
5495
5496 if (encodep)
5497 translation_table = CODING_ATTR_ENCODE_TBL (attrs),
5498 standard = Vstandard_translation_table_for_encode;
5499 else
5500 translation_table = CODING_ATTR_DECODE_TBL (attrs),
5501 standard = Vstandard_translation_table_for_decode;
5502 if (! NILP (translation_table) && SYMBOLP (translation_table))
5503 translation_table = Fget (translation_table, Qtranslation_table);
5504 if (NILP (translation_table))
5505 translation_table = standard;
5506 if (! CHAR_TABLE_P (translation_table))
5507 translation_table = Qnil;
5508 return translation_table;
5509 }
5510
5511
5487 static void 5512 static void
5488 translate_chars (coding, table) 5513 translate_chars (coding, table)
5489 struct coding_system *coding; 5514 struct coding_system *coding;
5490 Lisp_Object table; 5515 Lisp_Object table;
5491 { 5516 {
5498 5523
5499 while (charbuf < charbuf_end) 5524 while (charbuf < charbuf_end)
5500 { 5525 {
5501 c = *charbuf; 5526 c = *charbuf;
5502 if (c < 0) 5527 if (c < 0)
5503 charbuf += c; 5528 charbuf += -c;
5504 else 5529 else
5505 *charbuf++ = translate_char (table, c); 5530 *charbuf++ = translate_char (table, c);
5506 } 5531 }
5507 } 5532 }
5508 5533
5838 decode_coding (coding) 5863 decode_coding (coding)
5839 struct coding_system *coding; 5864 struct coding_system *coding;
5840 { 5865 {
5841 Lisp_Object attrs; 5866 Lisp_Object attrs;
5842 Lisp_Object undo_list; 5867 Lisp_Object undo_list;
5868 Lisp_Object translation_table;
5843 5869
5844 if (BUFFERP (coding->src_object) 5870 if (BUFFERP (coding->src_object)
5845 && coding->src_pos > 0 5871 && coding->src_pos > 0
5846 && coding->src_pos < GPT 5872 && coding->src_pos < GPT
5847 && coding->src_pos + coding->src_chars > GPT) 5873 && coding->src_pos + coding->src_chars > GPT)
5865 coding->errors = 0; 5891 coding->errors = 0;
5866 5892
5867 ALLOC_CONVERSION_WORK_AREA (coding); 5893 ALLOC_CONVERSION_WORK_AREA (coding);
5868 5894
5869 attrs = CODING_ID_ATTRS (coding->id); 5895 attrs = CODING_ID_ATTRS (coding->id);
5896 translation_table = get_translation_table (attrs, 1);
5870 5897
5871 do 5898 do
5872 { 5899 {
5873 coding_set_source (coding); 5900 coding_set_source (coding);
5874 coding->annotated = 0; 5901 coding->annotated = 0;
5875 (*(coding->decoder)) (coding); 5902 (*(coding->decoder)) (coding);
5876 if (!NILP (CODING_ATTR_DECODE_TBL (attrs))) 5903 if (!NILP (translation_table))
5877 translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs)); 5904 translate_chars (coding, translation_table);
5878 else if (!NILP (Vstandard_translation_table_for_decode))
5879 translate_chars (coding, Vstandard_translation_table_for_decode);
5880 coding_set_destination (coding); 5905 coding_set_destination (coding);
5881 produce_chars (coding); 5906 produce_chars (coding);
5882 if (coding->annotated) 5907 if (coding->annotated)
5883 produce_annotation (coding); 5908 produce_annotation (coding);
5884 } 5909 }
6165 static int 6190 static int
6166 encode_coding (coding) 6191 encode_coding (coding)
6167 struct coding_system *coding; 6192 struct coding_system *coding;
6168 { 6193 {
6169 Lisp_Object attrs; 6194 Lisp_Object attrs;
6195 Lisp_Object translation_table;
6170 6196
6171 attrs = CODING_ID_ATTRS (coding->id); 6197 attrs = CODING_ID_ATTRS (coding->id);
6198 translation_table = get_translation_table (attrs, 1);
6172 6199
6173 if (BUFFERP (coding->dst_object)) 6200 if (BUFFERP (coding->dst_object))
6174 { 6201 {
6175 set_buffer_internal (XBUFFER (coding->dst_object)); 6202 set_buffer_internal (XBUFFER (coding->dst_object));
6176 coding->dst_multibyte 6203 coding->dst_multibyte
6186 6213
6187 do { 6214 do {
6188 coding_set_source (coding); 6215 coding_set_source (coding);
6189 consume_chars (coding); 6216 consume_chars (coding);
6190 6217
6191 if (!NILP (CODING_ATTR_ENCODE_TBL (attrs))) 6218 if (!NILP (translation_table))
6192 translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs)); 6219 translate_chars (coding, translation_table);
6193 else if (!NILP (Vstandard_translation_table_for_encode))
6194 translate_chars (coding, Vstandard_translation_table_for_encode);
6195 6220
6196 coding_set_destination (coding); 6221 coding_set_destination (coding);
6197 (*(coding->encoder)) (coding); 6222 (*(coding->encoder)) (coding);
6198 } while (coding->consumed_char < coding->src_chars); 6223 } while (coding->consumed_char < coding->src_chars);
6199 6224
7070 int c; 7095 int c;
7071 Lisp_Object attrs; 7096 Lisp_Object attrs;
7072 { 7097 {
7073 Lisp_Object tail; 7098 Lisp_Object tail;
7074 struct charset *charset; 7099 struct charset *charset;
7075 7100 Lisp_Object translation_table;
7101
7102 translation_table = CODING_ATTR_TRANS_TBL (attrs);
7103 if (CHAR_TABLE_P (translation_table))
7104 c = translate_char (translation_table, c);
7076 for (tail = CODING_ATTR_CHARSET_LIST (attrs); 7105 for (tail = CODING_ATTR_CHARSET_LIST (attrs);
7077 CONSP (tail); tail = XCDR (tail)) 7106 CONSP (tail); tail = XCDR (tail))
7078 { 7107 {
7079 charset = CHARSET_FROM_ID (XINT (XCAR (tail))); 7108 charset = CHARSET_FROM_ID (XINT (XCAR (tail)));
7080 if (CHAR_CHARSET_P (c, charset)) 7109 if (CHAR_CHARSET_P (c, charset))
7141 Lisp_Object attrs; 7170 Lisp_Object attrs;
7142 7171
7143 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0); 7172 attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
7144 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs)) 7173 if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
7145 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided)) 7174 && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
7146 coding_attrs_list = Fcons (attrs, coding_attrs_list); 7175 {
7176 ASET (attrs, coding_attr_trans_tbl,
7177 get_translation_table (attrs, 1));
7178 coding_attrs_list = Fcons (attrs, coding_attrs_list);
7179 }
7147 } 7180 }
7148 7181
7149 if (STRINGP (start)) 7182 if (STRINGP (start))
7150 p = pbeg = SDATA (start); 7183 p = pbeg = SDATA (start);
7151 else 7184 else
7222 (start, end, coding_system, count, string) 7255 (start, end, coding_system, count, string)
7223 Lisp_Object start, end, coding_system, count, string; 7256 Lisp_Object start, end, coding_system, count, string;
7224 { 7257 {
7225 int n; 7258 int n;
7226 struct coding_system coding; 7259 struct coding_system coding;
7227 Lisp_Object attrs, charset_list; 7260 Lisp_Object attrs, charset_list, translation_table;
7228 Lisp_Object positions; 7261 Lisp_Object positions;
7229 int from, to; 7262 int from, to;
7230 const unsigned char *p, *stop, *pend; 7263 const unsigned char *p, *stop, *pend;
7231 int ascii_compatible; 7264 int ascii_compatible;
7232 7265
7234 attrs = CODING_ID_ATTRS (coding.id); 7267 attrs = CODING_ID_ATTRS (coding.id);
7235 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text)) 7268 if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text))
7236 return Qnil; 7269 return Qnil;
7237 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); 7270 ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs));
7238 charset_list = CODING_ATTR_CHARSET_LIST (attrs); 7271 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
7272 translation_table = get_translation_table (attrs, 1);
7239 7273
7240 if (NILP (string)) 7274 if (NILP (string))
7241 { 7275 {
7242 validate_region (&start, &end); 7276 validate_region (&start, &end);
7243 from = XINT (start); 7277 from = XINT (start);
7295 p = GAP_END_ADDR; 7329 p = GAP_END_ADDR;
7296 } 7330 }
7297 7331
7298 c = STRING_CHAR_ADVANCE (p); 7332 c = STRING_CHAR_ADVANCE (p);
7299 if (! (ASCII_CHAR_P (c) && ascii_compatible) 7333 if (! (ASCII_CHAR_P (c) && ascii_compatible)
7300 && ! char_charset (c, charset_list, NULL)) 7334 && ! char_charset (translate_char (translation_table, c),
7335 charset_list, NULL))
7301 { 7336 {
7302 positions = Fcons (make_number (from), positions); 7337 positions = Fcons (make_number (from), positions);
7303 n--; 7338 n--;
7304 if (n == 0) 7339 if (n == 0)
7305 break; 7340 break;
7336 Lisp_Object list; 7371 Lisp_Object list;
7337 EMACS_INT start_byte, end_byte; 7372 EMACS_INT start_byte, end_byte;
7338 int pos; 7373 int pos;
7339 const unsigned char *p, *pbeg, *pend; 7374 const unsigned char *p, *pbeg, *pend;
7340 int c; 7375 int c;
7341 Lisp_Object tail, elt; 7376 Lisp_Object tail, elt, attrs;
7342 7377
7343 if (STRINGP (start)) 7378 if (STRINGP (start))
7344 { 7379 {
7345 if (!STRING_MULTIBYTE (start) 7380 if (!STRING_MULTIBYTE (start)
7346 && SCHARS (start) != SBYTES (start)) 7381 && SCHARS (start) != SBYTES (start))
7374 7409
7375 list = Qnil; 7410 list = Qnil;
7376 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail)) 7411 for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
7377 { 7412 {
7378 elt = XCAR (tail); 7413 elt = XCAR (tail);
7379 list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0), 7414 attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
7380 Qnil)), 7415 ASET (attrs, coding_attr_trans_tbl, get_translation_table (attrs, 1));
7381 list); 7416 list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
7382 } 7417 }
7383 7418
7384 if (STRINGP (start)) 7419 if (STRINGP (start))
7385 p = pbeg = SDATA (start); 7420 p = pbeg = SDATA (start);
7386 else 7421 else
8131 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets; 8166 CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets;
8132 8167
8133 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p]; 8168 CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p];
8134 8169
8135 val = args[coding_arg_decode_translation_table]; 8170 val = args[coding_arg_decode_translation_table];
8136 if (! NILP (val)) 8171 if (! CHAR_TABLE_P (val))
8137 CHECK_CHAR_TABLE (val); 8172 CHECK_SYMBOL (val);
8138 CODING_ATTR_DECODE_TBL (attrs) = val; 8173 CODING_ATTR_DECODE_TBL (attrs) = val;
8139 8174
8140 val = args[coding_arg_encode_translation_table]; 8175 val = args[coding_arg_encode_translation_table];
8141 if (! NILP (val)) 8176 if (! CHAR_TABLE_P (val))
8142 CHECK_CHAR_TABLE (val); 8177 CHECK_SYMBOL (val);
8143 CODING_ATTR_ENCODE_TBL (attrs) = val; 8178 CODING_ATTR_ENCODE_TBL (attrs) = val;
8144 8179
8145 val = args[coding_arg_post_read_conversion]; 8180 val = args[coding_arg_post_read_conversion];
8146 CHECK_SYMBOL (val); 8181 CHECK_SYMBOL (val);
8147 CODING_ATTR_POST_READ (attrs) = val; 8182 CODING_ATTR_POST_READ (attrs) = val;
8413 else if (EQ (coding_type, Qshift_jis)) 8448 else if (EQ (coding_type, Qshift_jis))
8414 { 8449 {
8415 8450
8416 struct charset *charset; 8451 struct charset *charset;
8417 8452
8418 if (XINT (Flength (charset_list)) != 3) 8453 if (XINT (Flength (charset_list)) != 3
8419 error ("There should be just three charsets"); 8454 || XINT (Flength (charset_list)) != 4)
8455 error ("There should be three or four charsets");
8420 8456
8421 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); 8457 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8422 if (CHARSET_DIMENSION (charset) != 1) 8458 if (CHARSET_DIMENSION (charset) != 1)
8423 error ("Dimension of charset %s is not one", 8459 error ("Dimension of charset %s is not one",
8424 SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); 8460 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8427 8463
8428 charset_list = XCDR (charset_list); 8464 charset_list = XCDR (charset_list);
8429 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); 8465 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8430 if (CHARSET_DIMENSION (charset) != 1) 8466 if (CHARSET_DIMENSION (charset) != 1)
8431 error ("Dimension of charset %s is not one", 8467 error ("Dimension of charset %s is not one",
8468 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8469
8470 charset_list = XCDR (charset_list);
8471 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8472 if (CHARSET_DIMENSION (charset) != 2)
8473 error ("Dimension of charset %s is not two",
8432 SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); 8474 SDATA (SYMBOL_NAME (CHARSET_NAME (charset))));
8433 8475
8434 charset_list = XCDR (charset_list); 8476 charset_list = XCDR (charset_list);
8435 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); 8477 charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
8436 if (CHARSET_DIMENSION (charset) != 2) 8478 if (CHARSET_DIMENSION (charset) != 2)