comparison src/category.c @ 99016:3092df1e1b8a

(word_boundary_p): Check scripts instead of charset. Handle nil value in word-separating-categories and word-combining-categories. (syms_of_category): Fix docstrings of word-separating-categories and word-combining-categories.
author Kenichi Handa <handa@m17n.org>
date Wed, 22 Oct 2008 05:23:47 +0000
parents 8971ddf55736
children 2810c8ec19d6
comparison
equal deleted inserted replaced
99015:0941429c2093 99016:3092df1e1b8a
395 { 395 {
396 Lisp_Object category_set1, category_set2; 396 Lisp_Object category_set1, category_set2;
397 Lisp_Object tail; 397 Lisp_Object tail;
398 int default_result; 398 int default_result;
399 399
400 if (CHAR_CHARSET (c1) == CHAR_CHARSET (c2)) 400 if (EQ (CHAR_TABLE_REF (Vchar_script_table, c1),
401 CHAR_TABLE_REF (Vchar_script_table, c2)))
401 { 402 {
402 tail = Vword_separating_categories; 403 tail = Vword_separating_categories;
403 default_result = 0; 404 default_result = 0;
404 } 405 }
405 else 406 else
418 for (; CONSP (tail); tail = XCDR (tail)) 419 for (; CONSP (tail); tail = XCDR (tail))
419 { 420 {
420 Lisp_Object elt = XCAR (tail); 421 Lisp_Object elt = XCAR (tail);
421 422
422 if (CONSP (elt) 423 if (CONSP (elt)
423 && CATEGORYP (XCAR (elt)) 424 && (NILP (XCAR (elt))
424 && CATEGORYP (XCDR (elt)) 425 || (CATEGORYP (XCAR (elt))
425 && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1) 426 && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1)))
426 && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2)) 427 && (NILP (XCDR (elt))
428 || (CATEGORYP (XCDR (elt))
429 && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2))))
427 return !default_result; 430 return !default_result;
428 } 431 }
429 return default_result; 432 return default_result;
430 } 433 }
431 434
466 DEFVAR_LISP ("word-combining-categories", &Vword_combining_categories, 469 DEFVAR_LISP ("word-combining-categories", &Vword_combining_categories,
467 doc: /* List of pair (cons) of categories to determine word boundary. 470 doc: /* List of pair (cons) of categories to determine word boundary.
468 471
469 Emacs treats a sequence of word constituent characters as a single 472 Emacs treats a sequence of word constituent characters as a single
470 word (i.e. finds no word boundary between them) only if they belong to 473 word (i.e. finds no word boundary between them) only if they belong to
471 the same charset. But, exceptions are allowed in the following cases. 474 the same script. But, exceptions are allowed in the following cases.
472 475
473 \(1) The case that characters are in different charsets is controlled 476 \(1) The case that characters are in different scripts is controlled
474 by the variable `word-combining-categories'. 477 by the variable `word-combining-categories'.
475 478
476 Emacs finds no word boundary between characters of different charsets 479 Emacs finds no word boundary between characters of different scripts
477 if they have categories matching some element of this list. 480 if they have categories matching some element of this list.
478 481
479 More precisely, if an element of this list is a cons of category CAT1 482 More precisely, if an element of this list is a cons of category CAT1
480 and CAT2, and a multibyte character C1 which has CAT1 is followed by 483 and CAT2, and a multibyte character C1 which has CAT1 is followed by
481 C2 which has CAT2, there's no word boundary between C1 and C2. 484 C2 which has CAT2, there's no word boundary between C1 and C2.
482 485
483 For instance, to tell that ASCII characters and Latin-1 characters can 486 For instance, to tell that Han characters followed by Hiragana
484 form a single word, the element `(?l . ?l)' should be in this list 487 characters can form a single word, the element `(?C . ?H)' should be
485 because both characters have the category `l' (Latin characters). 488 in this list.
486 489
487 \(2) The case that character are in the same charset is controlled by 490 \(2) The case that character are in the same script is controlled by
488 the variable `word-separating-categories'. 491 the variable `word-separating-categories'.
489 492
490 Emacs find a word boundary between characters of the same charset 493 Emacs find a word boundary between characters of the same script
491 if they have categories matching some element of this list. 494 if they have categories matching some element of this list.
492 495
493 More precisely, if an element of this list is a cons of category CAT1 496 More precisely, if an element of this list is a cons of category CAT1
494 and CAT2, and a multibyte character C1 which has CAT1 is followed by 497 and CAT2, and a multibyte character C1 which has CAT1 is followed by
495 C2 which has CAT2, there's a word boundary between C1 and C2. 498 C2 which has CAT2, there's a word boundary between C1 and C2.
496 499
497 For instance, to tell that there's a word boundary between Japanese 500 For instance, to tell that there's a word boundary between Hiragana
498 Hiragana and Japanese Kanji (both are in the same charset), the 501 and Katakana (both are in the same script `kana'),
499 element `(?H . ?C) should be in this list. */); 502 the element `(?H . ?K) should be in this list. */);
500 503
501 Vword_combining_categories = Qnil; 504 Vword_combining_categories = Qnil;
502 505
503 DEFVAR_LISP ("word-separating-categories", &Vword_separating_categories, 506 DEFVAR_LISP ("word-separating-categories", &Vword_separating_categories,
504 doc: /* List of pair (cons) of categories to determine word boundary. 507 doc: /* List of pair (cons) of categories to determine word boundary.