comparison src/coding.c @ 46859:a26dd8891732

(unencodable_char_position): New function. (Funencodable_char_position): New function. (syms_of_coding): Defsubr Funencodable_char_position.
author Kenichi Handa <handa@m17n.org>
date Sun, 11 Aug 2002 01:06:42 +0000
parents 4acadb428f41
children f88c982e1f79
comparison
equal deleted inserted replaced
46858:62112f8664f7 46859:a26dd8891732
6496 Fcons (Qno_conversion, safe_codings))); 6496 Fcons (Qno_conversion, safe_codings)));
6497 return safe_codings; 6497 return safe_codings;
6498 } 6498 }
6499 6499
6500 6500
6501 /* Search from position POS for such characters that are unencodable
6502 accoding to SAFE_CHARS, and return a list of their positions. P
6503 points where in the memory the character at POS exists. Limit the
6504 search at PEND or when Nth unencodable characters are found.
6505
6506 If SAFE_CHARS is a char table, an element for an unencodable
6507 character is nil.
6508
6509 If SAFE_CHARS is nil, all non-ASCII characters are unencodable.
6510
6511 Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and
6512 eight-bit-graphic characters are unencodable. */
6513
6514 static Lisp_Object
6515 unencodable_char_position (safe_chars, pos, p, pend, n)
6516 Lisp_Object safe_chars;
6517 int pos;
6518 unsigned char *p, *pend;
6519 int n;
6520 {
6521 Lisp_Object pos_list;
6522
6523 pos_list = Qnil;
6524 while (p < pend)
6525 {
6526 int len;
6527 int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len);
6528
6529 if (c >= 128
6530 && (CHAR_TABLE_P (safe_chars)
6531 ? NILP (CHAR_TABLE_REF (safe_chars, c))
6532 : (NILP (safe_chars) || c < 256)))
6533 {
6534 pos_list = Fcons (make_number (pos), pos_list);
6535 if (--n <= 0)
6536 break;
6537 }
6538 pos++;
6539 p += len;
6540 }
6541 return Fnreverse (pos_list);
6542 }
6543
6544
6545 DEFUN ("unencodable-char-position", Funencodable_char_position,
6546 Sunencodable_char_position, 3, 5, 0,
6547 doc: /*
6548 Return position of first un-encodable character in a region.
6549 START and END specfiy the region and CODING-SYSTEM specifies the
6550 encoding to check. Return nil if CODING-SYSTEM does encode the region.
6551
6552 If optional 4th argument COUNT is non-nil, it specifies at most how
6553 many un-encodable characters to search. In this case, the value is a
6554 list of positions.
6555
6556 If optional 5th argument STRING is non-nil, it is a string to search
6557 for un-encodable characters. In that case, START and END are indexes
6558 to the string. */)
6559 (start, end, coding_system, count, string)
6560 Lisp_Object start, end, coding_system, count, string;
6561 {
6562 int n;
6563 Lisp_Object safe_chars;
6564 struct coding_system coding;
6565 Lisp_Object positions;
6566 int from, to;
6567 unsigned char *p, *pend;
6568
6569 if (NILP (string))
6570 {
6571 validate_region (&start, &end);
6572 from = XINT (start);
6573 to = XINT (end);
6574 if (NILP (current_buffer->enable_multibyte_characters))
6575 return Qnil;
6576 p = CHAR_POS_ADDR (from);
6577 pend = CHAR_POS_ADDR (to);
6578 }
6579 else
6580 {
6581 CHECK_STRING (string);
6582 CHECK_NATNUM (start);
6583 CHECK_NATNUM (end);
6584 from = XINT (start);
6585 to = XINT (end);
6586 if (from > to
6587 || to > SCHARS (string))
6588 args_out_of_range_3 (string, start, end);
6589 if (! STRING_MULTIBYTE (string))
6590 return Qnil;
6591 p = SDATA (string) + string_char_to_byte (string, from);
6592 pend = SDATA (string) + string_char_to_byte (string, to);
6593 }
6594
6595 setup_coding_system (Fcheck_coding_system (coding_system), &coding);
6596
6597 if (NILP (count))
6598 n = 1;
6599 else
6600 {
6601 CHECK_NATNUM (count);
6602 n = XINT (count);
6603 }
6604
6605 if (coding.type == coding_type_no_conversion
6606 || coding.type == coding_type_raw_text)
6607 return Qnil;
6608
6609 if (coding.type == coding_type_undecided)
6610 safe_chars = Qnil;
6611 else
6612 safe_chars = coding_safe_chars (&coding);
6613
6614 if (STRINGP (string)
6615 || from >= GPT || to <= GPT)
6616 positions = unencodable_char_position (safe_chars, from, p, pend, n);
6617 else
6618 {
6619 Lisp_Object args[2];
6620
6621 args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n);
6622 n -= Flength (args[0]);
6623 if (n <= 0)
6624 positions = args[0];
6625 else
6626 {
6627 args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR,
6628 pend, n);
6629 positions = Fappend (2, args);
6630 }
6631 }
6632
6633 return (NILP (count) ? Fcar (positions) : positions);
6634 }
6635
6636
6501 Lisp_Object 6637 Lisp_Object
6502 code_convert_region1 (start, end, coding_system, encodep) 6638 code_convert_region1 (start, end, coding_system, encodep)
6503 Lisp_Object start, end, coding_system; 6639 Lisp_Object start, end, coding_system;
6504 int encodep; 6640 int encodep;
6505 { 6641 {
7187 defsubr (&Sread_non_nil_coding_system); 7323 defsubr (&Sread_non_nil_coding_system);
7188 defsubr (&Scheck_coding_system); 7324 defsubr (&Scheck_coding_system);
7189 defsubr (&Sdetect_coding_region); 7325 defsubr (&Sdetect_coding_region);
7190 defsubr (&Sdetect_coding_string); 7326 defsubr (&Sdetect_coding_string);
7191 defsubr (&Sfind_coding_systems_region_internal); 7327 defsubr (&Sfind_coding_systems_region_internal);
7328 defsubr (&Sunencodable_char_position);
7192 defsubr (&Sdecode_coding_region); 7329 defsubr (&Sdecode_coding_region);
7193 defsubr (&Sencode_coding_region); 7330 defsubr (&Sencode_coding_region);
7194 defsubr (&Sdecode_coding_string); 7331 defsubr (&Sdecode_coding_string);
7195 defsubr (&Sencode_coding_string); 7332 defsubr (&Sencode_coding_string);
7196 defsubr (&Sdecode_sjis_char); 7333 defsubr (&Sdecode_sjis_char);