Mercurial > emacs
comparison src/category.c @ 99016:3092df1e1b8a
(word_boundary_p): Check scripts instead of charset.
Handle nil value in word-separating-categories and
word-combining-categories.
(syms_of_category): Fix docstrings of word-separating-categories
and word-combining-categories.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Wed, 22 Oct 2008 05:23:47 +0000 |
parents | 8971ddf55736 |
children | 2810c8ec19d6 |
comparison
equal
deleted
inserted
replaced
99015:0941429c2093 | 99016:3092df1e1b8a |
---|---|
395 { | 395 { |
396 Lisp_Object category_set1, category_set2; | 396 Lisp_Object category_set1, category_set2; |
397 Lisp_Object tail; | 397 Lisp_Object tail; |
398 int default_result; | 398 int default_result; |
399 | 399 |
400 if (CHAR_CHARSET (c1) == CHAR_CHARSET (c2)) | 400 if (EQ (CHAR_TABLE_REF (Vchar_script_table, c1), |
401 CHAR_TABLE_REF (Vchar_script_table, c2))) | |
401 { | 402 { |
402 tail = Vword_separating_categories; | 403 tail = Vword_separating_categories; |
403 default_result = 0; | 404 default_result = 0; |
404 } | 405 } |
405 else | 406 else |
418 for (; CONSP (tail); tail = XCDR (tail)) | 419 for (; CONSP (tail); tail = XCDR (tail)) |
419 { | 420 { |
420 Lisp_Object elt = XCAR (tail); | 421 Lisp_Object elt = XCAR (tail); |
421 | 422 |
422 if (CONSP (elt) | 423 if (CONSP (elt) |
423 && CATEGORYP (XCAR (elt)) | 424 && (NILP (XCAR (elt)) |
424 && CATEGORYP (XCDR (elt)) | 425 || (CATEGORYP (XCAR (elt)) |
425 && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1) | 426 && CATEGORY_MEMBER (XFASTINT (XCAR (elt)), category_set1))) |
426 && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2)) | 427 && (NILP (XCDR (elt)) |
428 || (CATEGORYP (XCDR (elt)) | |
429 && CATEGORY_MEMBER (XFASTINT (XCDR (elt)), category_set2)))) | |
427 return !default_result; | 430 return !default_result; |
428 } | 431 } |
429 return default_result; | 432 return default_result; |
430 } | 433 } |
431 | 434 |
466 DEFVAR_LISP ("word-combining-categories", &Vword_combining_categories, | 469 DEFVAR_LISP ("word-combining-categories", &Vword_combining_categories, |
467 doc: /* List of pair (cons) of categories to determine word boundary. | 470 doc: /* List of pair (cons) of categories to determine word boundary. |
468 | 471 |
469 Emacs treats a sequence of word constituent characters as a single | 472 Emacs treats a sequence of word constituent characters as a single |
470 word (i.e. finds no word boundary between them) only if they belong to | 473 word (i.e. finds no word boundary between them) only if they belong to |
471 the same charset. But, exceptions are allowed in the following cases. | 474 the same script. But, exceptions are allowed in the following cases. |
472 | 475 |
473 \(1) The case that characters are in different charsets is controlled | 476 \(1) The case that characters are in different scripts is controlled |
474 by the variable `word-combining-categories'. | 477 by the variable `word-combining-categories'. |
475 | 478 |
476 Emacs finds no word boundary between characters of different charsets | 479 Emacs finds no word boundary between characters of different scripts |
477 if they have categories matching some element of this list. | 480 if they have categories matching some element of this list. |
478 | 481 |
479 More precisely, if an element of this list is a cons of category CAT1 | 482 More precisely, if an element of this list is a cons of category CAT1 |
480 and CAT2, and a multibyte character C1 which has CAT1 is followed by | 483 and CAT2, and a multibyte character C1 which has CAT1 is followed by |
481 C2 which has CAT2, there's no word boundary between C1 and C2. | 484 C2 which has CAT2, there's no word boundary between C1 and C2. |
482 | 485 |
483 For instance, to tell that ASCII characters and Latin-1 characters can | 486 For instance, to tell that Han characters followed by Hiragana |
484 form a single word, the element `(?l . ?l)' should be in this list | 487 characters can form a single word, the element `(?C . ?H)' should be |
485 because both characters have the category `l' (Latin characters). | 488 in this list. |
486 | 489 |
487 \(2) The case that character are in the same charset is controlled by | 490 \(2) The case that character are in the same script is controlled by |
488 the variable `word-separating-categories'. | 491 the variable `word-separating-categories'. |
489 | 492 |
490 Emacs find a word boundary between characters of the same charset | 493 Emacs find a word boundary between characters of the same script |
491 if they have categories matching some element of this list. | 494 if they have categories matching some element of this list. |
492 | 495 |
493 More precisely, if an element of this list is a cons of category CAT1 | 496 More precisely, if an element of this list is a cons of category CAT1 |
494 and CAT2, and a multibyte character C1 which has CAT1 is followed by | 497 and CAT2, and a multibyte character C1 which has CAT1 is followed by |
495 C2 which has CAT2, there's a word boundary between C1 and C2. | 498 C2 which has CAT2, there's a word boundary between C1 and C2. |
496 | 499 |
497 For instance, to tell that there's a word boundary between Japanese | 500 For instance, to tell that there's a word boundary between Hiragana |
498 Hiragana and Japanese Kanji (both are in the same charset), the | 501 and Katakana (both are in the same script `kana'), |
499 element `(?H . ?C) should be in this list. */); | 502 the element `(?H . ?K) should be in this list. */); |
500 | 503 |
501 Vword_combining_categories = Qnil; | 504 Vword_combining_categories = Qnil; |
502 | 505 |
503 DEFVAR_LISP ("word-separating-categories", &Vword_separating_categories, | 506 DEFVAR_LISP ("word-separating-categories", &Vword_separating_categories, |
504 doc: /* List of pair (cons) of categories to determine word boundary. | 507 doc: /* List of pair (cons) of categories to determine word boundary. |