Mercurial > emacs
comparison src/coding.c @ 88597:74b74f59bc09
(decode_coding_charset): Adjusted for the change of
Fdefine_coding_system_internal.
(Fdefine_coding_system_internal): For a coding system of
`charset' type, store a list of charset IDs in
`charset_attr_charset_valids' element of coding attributes.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Wed, 22 May 2002 11:14:45 +0000 |
parents | 11186ff7ea0d |
children | b88195f69856 |
comparison
equal
deleted
inserted
replaced
88596:235ee9d10206 | 88597:74b74f59bc09 |
---|---|
4318 break; | 4318 break; |
4319 | 4319 |
4320 ONE_MORE_BYTE (c); | 4320 ONE_MORE_BYTE (c); |
4321 if (c == '\r') | 4321 if (c == '\r') |
4322 { | 4322 { |
4323 /* Here we assume that no charset maps '\r' to something | |
4324 else. */ | |
4323 if (EQ (eol_type, Qdos)) | 4325 if (EQ (eol_type, Qdos)) |
4324 { | 4326 { |
4325 if (src < src_end | 4327 if (src < src_end |
4326 && *src == '\n') | 4328 && *src == '\n') |
4327 ONE_MORE_BYTE (c); | 4329 ONE_MORE_BYTE (c); |
4331 } | 4333 } |
4332 else | 4334 else |
4333 { | 4335 { |
4334 Lisp_Object val; | 4336 Lisp_Object val; |
4335 struct charset *charset; | 4337 struct charset *charset; |
4338 int dim; | |
4339 unsigned code; | |
4336 int c1; | 4340 int c1; |
4337 | 4341 |
4338 val = AREF (valids, c); | 4342 val = AREF (valids, c); |
4339 if (NILP (val)) | 4343 if (NILP (val)) |
4340 goto invalid_code; | 4344 goto invalid_code; |
4341 charset = CHARSET_FROM_ID (XFASTINT (val)); | 4345 if (INTEGERP (val)) |
4342 if (CHARSET_DIMENSION (charset) > 1) | |
4343 { | 4346 { |
4344 ONE_MORE_BYTE (c1); | 4347 charset = CHARSET_FROM_ID (XFASTINT (val)); |
4345 c = (c << 8) | c1; | 4348 dim = CHARSET_DIMENSION (charset); |
4346 if (CHARSET_DIMENSION (charset) > 2) | 4349 code = c; |
4350 if (dim > 1) | |
4347 { | 4351 { |
4348 ONE_MORE_BYTE (c1); | 4352 ONE_MORE_BYTE (c1); |
4349 c = (c << 8) | c1; | 4353 code = (code << 8) | c1; |
4350 if (CHARSET_DIMENSION (charset) > 3) | 4354 if (dim > 2) |
4351 { | 4355 { |
4352 ONE_MORE_BYTE (c1); | 4356 ONE_MORE_BYTE (c1); |
4353 c = (c << 8) | c1; | 4357 code = (code << 8) | c1; |
4358 if (dim > 3) | |
4359 { | |
4360 ONE_MORE_BYTE (c1); | |
4361 code = (c << 8) | c1; | |
4362 } | |
4354 } | 4363 } |
4355 } | 4364 } |
4365 CODING_DECODE_CHAR (coding, src, src_base, src_end, | |
4366 charset, code, c); | |
4356 } | 4367 } |
4357 CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c); | 4368 else |
4369 { | |
4370 /* VAL is a list of charset IDs. It is assured that the | |
4371 list is sorted by charset dimensions (smaller one | |
4372 comes first). */ | |
4373 int b[4]; | |
4374 int len = 1; | |
4375 | |
4376 b[0] = c; | |
4377 /* VAL is a list of charset IDs. */ | |
4378 while (CONSP (val)) | |
4379 { | |
4380 charset = CHARSET_FROM_ID (XFASTINT (XCAR (val))); | |
4381 dim = CHARSET_DIMENSION (charset); | |
4382 while (len < dim) | |
4383 { | |
4384 ONE_MORE_BYTE (c1); | |
4385 b[len++] = c1; | |
4386 } | |
4387 if (dim == 1) | |
4388 code = b[0]; | |
4389 else if (dim == 2) | |
4390 code = (b[0] << 8) | b[1]; | |
4391 else if (dim == 3) | |
4392 code = (b[0] << 16) | (b[1] << 8) | b[2]; | |
4393 else | |
4394 code = (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]; | |
4395 CODING_DECODE_CHAR (coding, src, src_base, | |
4396 src_end, charset, code, c); | |
4397 if (c >= 0) | |
4398 break; | |
4399 val = XCDR (val); | |
4400 } | |
4401 } | |
4358 if (c < 0) | 4402 if (c < 0) |
4359 goto invalid_code; | 4403 goto invalid_code; |
4360 } | 4404 } |
4361 *charbuf++ = c; | 4405 *charbuf++ = c; |
4362 continue; | 4406 continue; |
7364 CHECK_LIST (val); | 7408 CHECK_LIST (val); |
7365 CODING_ATTR_PLIST (attrs) = val; | 7409 CODING_ATTR_PLIST (attrs) = val; |
7366 | 7410 |
7367 if (EQ (coding_type, Qcharset)) | 7411 if (EQ (coding_type, Qcharset)) |
7368 { | 7412 { |
7413 /* Generate a lisp vector of 256 elements. Each element is nil, | |
7414 integer, or a list of charset IDs. | |
7415 | |
7416 If Nth element is nil, the byte code N is invalid in this | |
7417 coding system. | |
7418 | |
7419 If Nth element is a number NUM, N is the first byte of a | |
7420 charset whose ID is NUM. | |
7421 | |
7422 If Nth element is a list of charset IDs, N is the first byte | |
7423 of one of them. The list is sorted by dimensions of the | |
7424 charsets. A charset of smaller dimension comes firtst. | |
7425 */ | |
7369 val = Fmake_vector (make_number (256), Qnil); | 7426 val = Fmake_vector (make_number (256), Qnil); |
7370 | 7427 |
7371 for (tail = charset_list; CONSP (tail); tail = XCDR (tail)) | 7428 for (tail = charset_list; CONSP (tail); tail = XCDR (tail)) |
7372 { | 7429 { |
7373 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (tail))); | 7430 struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail))); |
7374 int idx = (CHARSET_DIMENSION (charset) - 1) * 4; | 7431 int dim = CHARSET_DIMENSION (charset); |
7375 | 7432 int idx = (dim - 1) * 4; |
7433 | |
7376 for (i = charset->code_space[idx]; | 7434 for (i = charset->code_space[idx]; |
7377 i <= charset->code_space[idx + 1]; i++) | 7435 i <= charset->code_space[idx + 1]; i++) |
7378 { | 7436 { |
7379 if (NILP (AREF (val, i))) | 7437 Lisp_Object tmp, tmp2; |
7380 ASET (val, i, XCAR (tail)); | 7438 int dim2; |
7439 | |
7440 tmp = AREF (val, i); | |
7441 if (NILP (tmp)) | |
7442 tmp = XCAR (tail); | |
7443 else if (NUMBERP (tmp)) | |
7444 { | |
7445 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp))); | |
7446 if (dim < dim2) | |
7447 tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil)); | |
7448 else | |
7449 tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil)); | |
7450 } | |
7381 else | 7451 else |
7382 error ("Charsets conflicts in the first byte"); | 7452 { |
7453 for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2)) | |
7454 { | |
7455 dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2)))); | |
7456 if (dim < dim2) | |
7457 break; | |
7458 } | |
7459 if (NILP (tmp2)) | |
7460 tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil)); | |
7461 else | |
7462 { | |
7463 XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2))); | |
7464 XSETCAR (tmp2, XCAR (tail)); | |
7465 } | |
7466 } | |
7467 ASET (val, i, tmp); | |
7383 } | 7468 } |
7384 } | 7469 } |
7385 ASET (attrs, coding_attr_charset_valids, val); | 7470 ASET (attrs, coding_attr_charset_valids, val); |
7386 category = coding_category_charset; | 7471 category = coding_category_charset; |
7387 } | 7472 } |