Mercurial > emacs
comparison src/coding.c @ 34888:b469d29c0815
(SAFE_ONE_MORE_BYTE): New macro.
(DECODE_EMACS_MULE_COMPOSITION_CHAR): New macro.
(DECODE_EMACS_MULE_COMPOSITION_RULE): New macro.
(decode_composition_emacs_mule): New function.
(decode_coding_emacs_mule): Decode composition sequence by calling
decode_composition_emacs_mule.
(ENCODE_COMPOSITION_EMACS_MULE): New macro.
(encode_coding_emacs_mule): Changed from macro to function. If
a text contains compostions, encode them correctly.
(setup_coding_system): Set coding->commong_flags for emacs-mule so
that decoding and encoding are required.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Thu, 28 Dec 2000 01:05:02 +0000 |
parents | e112f39ea5b6 |
children | 3868f2e7355a |
comparison
equal
deleted
inserted
replaced
34887:cf361d741e2c | 34888:b469d29c0815 |
---|---|
511 (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c))) | 511 (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c))) |
512 | 512 |
513 | 513 |
514 /*** 2. Emacs internal format (emacs-mule) handlers ***/ | 514 /*** 2. Emacs internal format (emacs-mule) handlers ***/ |
515 | 515 |
516 /* Emacs' internal format for encoding multiple character sets is a | 516 /* Emacs' internal format for representation of multiple character |
517 kind of multi-byte encoding, i.e. characters are encoded by | 517 sets is a kind of multi-byte encoding, i.e. characters are |
518 variable-length sequences of one-byte codes. | 518 represented by variable-length sequences of one-byte codes. |
519 | 519 |
520 ASCII characters and control characters (e.g. `tab', `newline') are | 520 ASCII characters and control characters (e.g. `tab', `newline') are |
521 represented by one-byte sequences which are their ASCII codes, in | 521 represented by one-byte sequences which are their ASCII codes, in |
522 the range 0x00 through 0x7F. | 522 the range 0x00 through 0x7F. |
523 | 523 |
529 one-byte sequences which are their 8-bit code. | 529 one-byte sequences which are their 8-bit code. |
530 | 530 |
531 The other characters are represented by a sequence of `base | 531 The other characters are represented by a sequence of `base |
532 leading-code', optional `extended leading-code', and one or two | 532 leading-code', optional `extended leading-code', and one or two |
533 `position-code's. The length of the sequence is determined by the | 533 `position-code's. The length of the sequence is determined by the |
534 base leading-code. Leading-code takes the range 0x80 through 0x9F, | 534 base leading-code. Leading-code takes the range 0x81 through 0x9D, |
535 whereas extended leading-code and position-code take the range 0xA0 | 535 whereas extended leading-code and position-code take the range 0xA0 |
536 through 0xFF. See `charset.h' for more details about leading-code | 536 through 0xFF. See `charset.h' for more details about leading-code |
537 and position-code. | 537 and position-code. |
538 | 538 |
539 --- CODE RANGE of Emacs' internal format --- | 539 --- CODE RANGE of Emacs' internal format --- |
540 character set range | 540 character set range |
541 ------------- ----- | 541 ------------- ----- |
542 ascii 0x00..0x7F | 542 ascii 0x00..0x7F |
543 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF | 543 eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF |
544 eight-bit-graphic 0xA0..0xBF | 544 eight-bit-graphic 0xA0..0xBF |
545 ELSE 0x81..0x9F + [0xA0..0xFF]+ | 545 ELSE 0x81..0x9D + [0xA0..0xFF]+ |
546 --------------------------------------------- | 546 --------------------------------------------- |
547 | 547 |
548 As this is the internal character representation, the format is | |
549 usually not used externally (i.e. in a file or in a data sent to a | |
550 process). But, it is possible to have a text externally in this | |
551 format (i.e. by encoding by the coding system `emacs-mule'). | |
552 | |
553 In that case, a sequence of one-byte codes has a slightly different | |
554 form. | |
555 | |
556 At first, all characters in eight-bit-control are represented by | |
557 one-byte sequences which are their 8-bit code. | |
558 | |
559 Next, character composition data are represented by the byte | |
560 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ..., | |
561 where, | |
562 METHOD is 0xF0 plus one of composition method (enum | |
563 composition_method), | |
564 | |
565 BYTES is 0x20 plus a byte length of this composition data, | |
566 | |
567 CHARS is 0x20 plus a number of characters composed by this | |
568 data, | |
569 | |
570 COMPONENTs are characters of multibye form or composition | |
571 rules encoded by two-byte of ASCII codes. | |
572 | |
573 In addition, for backward compatibility, the following formats are | |
574 also recognized as composition data on decoding. | |
575 | |
576 0x80 MSEQ ... | |
577 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ | |
578 | |
579 Here, | |
580 MSEQ is a multibyte form but in these special format: | |
581 ASCII: 0xA0 ASCII_CODE+0x80, | |
582 other: LEADING_CODE+0x20 FOLLOWING-BYTE ..., | |
583 RULE is a one byte code of the range 0xA0..0xF0 that | |
584 represents a composition rule. | |
548 */ | 585 */ |
549 | 586 |
550 enum emacs_code_class_type emacs_code_class[256]; | 587 enum emacs_code_class_type emacs_code_class[256]; |
551 | 588 |
552 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | 589 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". |
605 } | 642 } |
606 label_end_of_loop: | 643 label_end_of_loop: |
607 return CODING_CATEGORY_MASK_EMACS_MULE; | 644 return CODING_CATEGORY_MASK_EMACS_MULE; |
608 } | 645 } |
609 | 646 |
647 | |
648 /* Record the starting position START and METHOD of one composition. */ | |
649 | |
650 #define CODING_ADD_COMPOSITION_START(coding, start, method) \ | |
651 do { \ | |
652 struct composition_data *cmp_data = coding->cmp_data; \ | |
653 int *data = cmp_data->data + cmp_data->used; \ | |
654 coding->cmp_data_start = cmp_data->used; \ | |
655 data[0] = -1; \ | |
656 data[1] = cmp_data->char_offset + start; \ | |
657 data[3] = (int) method; \ | |
658 cmp_data->used += 4; \ | |
659 } while (0) | |
660 | |
661 /* Record the ending position END of the current composition. */ | |
662 | |
663 #define CODING_ADD_COMPOSITION_END(coding, end) \ | |
664 do { \ | |
665 struct composition_data *cmp_data = coding->cmp_data; \ | |
666 int *data = cmp_data->data + coding->cmp_data_start; \ | |
667 data[0] = cmp_data->used - coding->cmp_data_start; \ | |
668 data[2] = cmp_data->char_offset + end; \ | |
669 } while (0) | |
670 | |
671 /* Record one COMPONENT (alternate character or composition rule). */ | |
672 | |
673 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \ | |
674 (coding->cmp_data->data[coding->cmp_data->used++] = component) | |
675 | |
676 | |
677 /* Get one byte from a data pointed by SRC and increment SRC. If SRC | |
678 is not less than SRC_END, return -1 without inccrementing Src. */ | |
679 | |
680 #define SAFE_ONE_MORE_BYTE() (src >= src_end ? -1 : *src++) | |
681 | |
682 | |
683 /* Decode a character represented as a component of composition | |
684 sequence of Emacs 20 style at SRC. Set C to that character, store | |
685 its multibyte form sequence at P, and set P to the end of that | |
686 sequence. If no valid character is found, set C to -1. */ | |
687 | |
688 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p) \ | |
689 do { \ | |
690 int bytes; \ | |
691 \ | |
692 c = SAFE_ONE_MORE_BYTE (); \ | |
693 if (c < 0) \ | |
694 break; \ | |
695 if (CHAR_HEAD_P (c)) \ | |
696 c = -1; \ | |
697 else if (c == 0xA0) \ | |
698 { \ | |
699 c = SAFE_ONE_MORE_BYTE (); \ | |
700 if (c < 0xA0) \ | |
701 c = -1; \ | |
702 else \ | |
703 { \ | |
704 c -= 0xA0; \ | |
705 *p++ = c; \ | |
706 } \ | |
707 } \ | |
708 else if (BASE_LEADING_CODE_P (c - 0x20)) \ | |
709 { \ | |
710 unsigned char *p0 = p; \ | |
711 \ | |
712 c -= 0x20; \ | |
713 *p++ = c; \ | |
714 bytes = BYTES_BY_CHAR_HEAD (c); \ | |
715 while (--bytes) \ | |
716 { \ | |
717 c = SAFE_ONE_MORE_BYTE (); \ | |
718 if (c < 0) \ | |
719 break; \ | |
720 *p++ = c; \ | |
721 } \ | |
722 if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)) \ | |
723 c = STRING_CHAR (p0, bytes); \ | |
724 else \ | |
725 c = -1; \ | |
726 } \ | |
727 else \ | |
728 c = -1; \ | |
729 } while (0) | |
730 | |
731 | |
732 /* Decode a composition rule represented as a component of composition | |
733 sequence of Emacs 20 style at SRC. Set C to the rule. If not | |
734 valid rule is found, set C to -1. */ | |
735 | |
736 #define DECODE_EMACS_MULE_COMPOSITION_RULE(c) \ | |
737 do { \ | |
738 c = SAFE_ONE_MORE_BYTE (); \ | |
739 c -= 0xA0; \ | |
740 if (c < 0 || c >= 81) \ | |
741 c = -1; \ | |
742 else \ | |
743 { \ | |
744 gref = c / 9, nref = c % 9; \ | |
745 c = COMPOSITION_ENCODE_RULE (gref, nref); \ | |
746 } \ | |
747 } while (0) | |
748 | |
749 | |
750 /* Decode composition sequence encoded by `emacs-mule' at the source | |
751 pointed by SRC. SRC_END is the end of source. Store information | |
752 of the composition in CODING->cmp_data. | |
753 | |
754 For backward compatibility, decode also a composition sequence of | |
755 Emacs 20 style. In that case, the composition sequence contains | |
756 characters that should be extracted into a buffer or string. Store | |
757 those characters at *DESTINATION in multibyte form. | |
758 | |
759 If we encounter an invalid byte sequence, return 0. | |
760 If we encounter an insufficient source or destination, or | |
761 insufficient space in CODING->cmp_data, return 1. | |
762 Otherwise, return consumed bytes in the source. | |
763 | |
764 */ | |
765 static INLINE int | |
766 decode_composition_emacs_mule (coding, src, src_end, | |
767 destination, dst_end, dst_bytes) | |
768 struct coding_system *coding; | |
769 unsigned char *src, *src_end, **destination, *dst_end; | |
770 int dst_bytes; | |
771 { | |
772 unsigned char *dst = *destination; | |
773 int method, data_len, nchars; | |
774 unsigned char *src_base = src++; | |
775 /* Store compoments of composition. */ | |
776 int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH]; | |
777 int ncomponent; | |
778 /* Store multibyte form of characters to be composed. This is for | |
779 Emacs 20 style composition sequence. */ | |
780 unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH]; | |
781 unsigned char *bufp = buf; | |
782 int c, i, gref, nref; | |
783 | |
784 if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH | |
785 >= COMPOSITION_DATA_SIZE) | |
786 { | |
787 coding->result = CODING_FINISH_INSUFFICIENT_CMP; | |
788 return -1; | |
789 } | |
790 | |
791 ONE_MORE_BYTE (c); | |
792 if (c - 0xF0 >= COMPOSITION_RELATIVE | |
793 && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS) | |
794 { | |
795 int with_rule; | |
796 | |
797 method = c - 0xF0; | |
798 with_rule = (method == COMPOSITION_WITH_RULE | |
799 || method == COMPOSITION_WITH_RULE_ALTCHARS); | |
800 ONE_MORE_BYTE (c); | |
801 data_len = c - 0xA0; | |
802 if (data_len < 4 | |
803 || src_base + data_len > src_end) | |
804 return 0; | |
805 ONE_MORE_BYTE (c); | |
806 nchars = c - 0xA0; | |
807 if (c < 1) | |
808 return 0; | |
809 for (ncomponent = 0; src < src_base + data_len; ncomponent++) | |
810 { | |
811 if (ncomponent % 2 && with_rule) | |
812 { | |
813 ONE_MORE_BYTE (gref); | |
814 gref -= 32; | |
815 ONE_MORE_BYTE (nref); | |
816 nref -= 32; | |
817 c = COMPOSITION_ENCODE_RULE (gref, nref); | |
818 } | |
819 else | |
820 { | |
821 int bytes; | |
822 if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) | |
823 c = STRING_CHAR (src, bytes); | |
824 else | |
825 c = *src, bytes = 1; | |
826 src += bytes; | |
827 } | |
828 component[ncomponent] = c; | |
829 } | |
830 } | |
831 else | |
832 { | |
833 /* This may be an old Emacs 20 style format. See the comment at | |
834 the section 2 of this file. */ | |
835 while (src < src_end && !CHAR_HEAD_P (*src)) src++; | |
836 if (src == src_end | |
837 && !(coding->mode & CODING_MODE_LAST_BLOCK)) | |
838 goto label_end_of_loop; | |
839 | |
840 src_end = src; | |
841 src = src_base + 1; | |
842 if (c < 0xC0) | |
843 { | |
844 method = COMPOSITION_RELATIVE; | |
845 for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;) | |
846 { | |
847 DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp); | |
848 if (c < 0) | |
849 break; | |
850 component[ncomponent++] = c; | |
851 } | |
852 if (ncomponent < 2) | |
853 return 0; | |
854 nchars = ncomponent; | |
855 } | |
856 else if (c == 0xFF) | |
857 { | |
858 method = COMPOSITION_WITH_RULE; | |
859 src++; | |
860 DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp); | |
861 if (c < 0) | |
862 return 0; | |
863 component[0] = c; | |
864 for (ncomponent = 1; | |
865 ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;) | |
866 { | |
867 DECODE_EMACS_MULE_COMPOSITION_RULE (c); | |
868 if (c < 0) | |
869 break; | |
870 component[ncomponent++] = c; | |
871 DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp); | |
872 if (c < 0) | |
873 break; | |
874 component[ncomponent++] = c; | |
875 } | |
876 if (ncomponent < 3) | |
877 return 0; | |
878 nchars = (ncomponent + 1) / 2; | |
879 } | |
880 else | |
881 return 0; | |
882 } | |
883 | |
884 if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src)) | |
885 { | |
886 CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method); | |
887 for (i = 0; i < ncomponent; i++) | |
888 CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]); | |
889 CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars); | |
890 if (buf < bufp) | |
891 { | |
892 unsigned char *p = buf; | |
893 EMIT_BYTES (p, bufp); | |
894 *destination += bufp - buf; | |
895 coding->produced_char += nchars; | |
896 } | |
897 return (src - src_base); | |
898 } | |
899 label_end_of_loop: | |
900 return -1; | |
901 } | |
610 | 902 |
611 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ | 903 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ |
612 | 904 |
613 static void | 905 static void |
614 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) | 906 decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) |
667 } | 959 } |
668 *dst++ = *src++; | 960 *dst++ = *src++; |
669 coding->produced_char++; | 961 coding->produced_char++; |
670 continue; | 962 continue; |
671 } | 963 } |
672 else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) | 964 else if (*src == 0x80) |
673 { | 965 { |
674 p = src; | 966 /* Start of composition data. */ |
675 src += bytes; | 967 int consumed = decode_composition_emacs_mule (coding, src, src_end, |
676 } | 968 &dst, dst_end, |
677 else | 969 dst_bytes); |
678 { | 970 if (consumed < 0) |
971 goto label_end_of_loop; | |
972 else if (consumed > 0) | |
973 { | |
974 src += consumed; | |
975 continue; | |
976 } | |
679 bytes = CHAR_STRING (*src, tmp); | 977 bytes = CHAR_STRING (*src, tmp); |
680 p = tmp; | 978 p = tmp; |
681 src++; | 979 src++; |
682 } | 980 } |
981 else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) | |
982 { | |
983 p = src; | |
984 src += bytes; | |
985 } | |
986 else | |
987 { | |
988 bytes = CHAR_STRING (*src, tmp); | |
989 p = tmp; | |
990 src++; | |
991 } | |
683 if (dst + bytes >= (dst_bytes ? dst_end : src)) | 992 if (dst + bytes >= (dst_bytes ? dst_end : src)) |
684 { | 993 { |
685 coding->result = CODING_FINISH_INSUFFICIENT_DST; | 994 coding->result = CODING_FINISH_INSUFFICIENT_DST; |
686 break; | 995 break; |
687 } | 996 } |
691 label_end_of_loop: | 1000 label_end_of_loop: |
692 coding->consumed = coding->consumed_char = src_base - source; | 1001 coding->consumed = coding->consumed_char = src_base - source; |
693 coding->produced = dst - destination; | 1002 coding->produced = dst - destination; |
694 } | 1003 } |
695 | 1004 |
696 #define encode_coding_emacs_mule(coding, source, destination, src_bytes, dst_bytes) \ | 1005 |
697 encode_eol (coding, source, destination, src_bytes, dst_bytes) | 1006 /* Encode composition data stored at DATA into a special byte sequence |
698 | 1007 starting by 0x80. Update CODING->cmp_data_start and maybe |
1008 CODING->cmp_data for the next call. */ | |
1009 | |
1010 #define ENCODE_COMPOSITION_EMACS_MULE(coding, data) \ | |
1011 do { \ | |
1012 unsigned char buf[1024], *p0 = buf, *p; \ | |
1013 int len = data[0]; \ | |
1014 int i; \ | |
1015 \ | |
1016 buf[0] = 0x80; \ | |
1017 buf[1] = 0xF0 + data[3]; /* METHOD */ \ | |
1018 buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */ \ | |
1019 p = buf + 4; \ | |
1020 if (data[3] == COMPOSITION_WITH_RULE \ | |
1021 || data[3] == COMPOSITION_WITH_RULE_ALTCHARS) \ | |
1022 { \ | |
1023 p += CHAR_STRING (data[4], p); \ | |
1024 for (i = 5; i < len; i += 2) \ | |
1025 { \ | |
1026 int gref, nref; \ | |
1027 COMPOSITION_DECODE_RULE (data[i], gref, nref); \ | |
1028 *p++ = 0x20 + gref; \ | |
1029 *p++ = 0x20 + nref; \ | |
1030 p += CHAR_STRING (data[i + 1], p); \ | |
1031 } \ | |
1032 } \ | |
1033 else \ | |
1034 { \ | |
1035 for (i = 4; i < len; i++) \ | |
1036 p += CHAR_STRING (data[i], p); \ | |
1037 } \ | |
1038 buf[2] = 0xA0 + (p - buf); /* COMPONENTS-BYTES */ \ | |
1039 \ | |
1040 if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src)) \ | |
1041 { \ | |
1042 coding->result = CODING_FINISH_INSUFFICIENT_DST; \ | |
1043 goto label_end_of_loop; \ | |
1044 } \ | |
1045 while (p0 < p) \ | |
1046 *dst++ = *p0++; \ | |
1047 coding->cmp_data_start += data[0]; \ | |
1048 if (coding->cmp_data_start == coding->cmp_data->used \ | |
1049 && coding->cmp_data->next) \ | |
1050 { \ | |
1051 coding->cmp_data = coding->cmp_data->next; \ | |
1052 coding->cmp_data_start = 0; \ | |
1053 } \ | |
1054 } while (0) | |
1055 | |
1056 | |
1057 static void encode_eol P_ ((struct coding_system *, unsigned char *, | |
1058 unsigned char *, int, int)); | |
1059 | |
1060 static void | |
1061 encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) | |
1062 struct coding_system *coding; | |
1063 unsigned char *source, *destination; | |
1064 int src_bytes, dst_bytes; | |
1065 { | |
1066 unsigned char *src = source; | |
1067 unsigned char *src_end = source + src_bytes; | |
1068 unsigned char *dst = destination; | |
1069 unsigned char *dst_end = destination + dst_bytes; | |
1070 unsigned char *src_base; | |
1071 int c; | |
1072 int char_offset; | |
1073 int *data; | |
1074 | |
1075 Lisp_Object translation_table; | |
1076 | |
1077 translation_table = Qnil; | |
1078 | |
1079 /* Optimization for the case that there's no composition. */ | |
1080 if (!coding->cmp_data || coding->cmp_data->used == 0) | |
1081 { | |
1082 encode_eol (coding, source, destination, src_bytes, dst_bytes); | |
1083 return; | |
1084 } | |
1085 | |
1086 char_offset = coding->cmp_data->char_offset; | |
1087 data = coding->cmp_data->data + coding->cmp_data_start; | |
1088 while (1) | |
1089 { | |
1090 src_base = src; | |
1091 | |
1092 /* If SRC starts a composition, encode the information about the | |
1093 composition in advance. */ | |
1094 if (coding->cmp_data_start < coding->cmp_data->used | |
1095 && char_offset + coding->consumed_char == data[1]) | |
1096 { | |
1097 ENCODE_COMPOSITION_EMACS_MULE (coding, data); | |
1098 char_offset = coding->cmp_data->char_offset; | |
1099 data = coding->cmp_data->data + coding->cmp_data_start; | |
1100 } | |
1101 | |
1102 ONE_MORE_CHAR (c); | |
1103 if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF | |
1104 || coding->eol_type == CODING_EOL_CR)) | |
1105 { | |
1106 if (coding->eol_type == CODING_EOL_CRLF) | |
1107 EMIT_TWO_BYTES ('\r', c); | |
1108 else | |
1109 EMIT_ONE_BYTE ('\r'); | |
1110 } | |
1111 else if (SINGLE_BYTE_CHAR_P (c)) | |
1112 EMIT_ONE_BYTE (c); | |
1113 else | |
1114 EMIT_BYTES (src_base, src); | |
1115 coding->consumed_char++; | |
1116 } | |
1117 label_end_of_loop: | |
1118 coding->consumed = src_base - source; | |
1119 coding->produced = coding->produced_char = dst - destination; | |
1120 return; | |
1121 } | |
699 | 1122 |
700 | 1123 |
701 /*** 3. ISO2022 handlers ***/ | 1124 /*** 3. ISO2022 handlers ***/ |
702 | 1125 |
703 /* The following note describes the coding system ISO2022 briefly. | 1126 /* The following note describes the coding system ISO2022 briefly. |
1178 coding->cmp_data->next = cmp_data; | 1601 coding->cmp_data->next = cmp_data; |
1179 coding->cmp_data = cmp_data; | 1602 coding->cmp_data = cmp_data; |
1180 coding->cmp_data_start = 0; | 1603 coding->cmp_data_start = 0; |
1181 } | 1604 } |
1182 | 1605 |
1183 /* Record the starting position START and METHOD of one composition. */ | 1606 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4. |
1184 | 1607 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1 |
1185 #define CODING_ADD_COMPOSITION_START(coding, start, method) \ | 1608 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1 |
1186 do { \ | 1609 ESC 3 : altchar composition : ESC 3 ALT ... ESC 0 CHAR ... ESC 1 |
1187 struct composition_data *cmp_data = coding->cmp_data; \ | 1610 ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1 |
1188 int *data = cmp_data->data + cmp_data->used; \ | 1611 */ |
1189 coding->cmp_data_start = cmp_data->used; \ | |
1190 data[0] = -1; \ | |
1191 data[1] = cmp_data->char_offset + start; \ | |
1192 data[3] = (int) method; \ | |
1193 cmp_data->used += 4; \ | |
1194 } while (0) | |
1195 | |
1196 /* Record the ending position END of the current composition. */ | |
1197 | |
1198 #define CODING_ADD_COMPOSITION_END(coding, end) \ | |
1199 do { \ | |
1200 struct composition_data *cmp_data = coding->cmp_data; \ | |
1201 int *data = cmp_data->data + coding->cmp_data_start; \ | |
1202 data[0] = cmp_data->used - coding->cmp_data_start; \ | |
1203 data[2] = cmp_data->char_offset + end; \ | |
1204 } while (0) | |
1205 | |
1206 /* Record one COMPONENT (alternate character or composition rule). */ | |
1207 | |
1208 #define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \ | |
1209 (coding->cmp_data->data[coding->cmp_data->used++] = component) | |
1210 | |
1211 /* Handle compositoin start sequence ESC 0, ESC 2, ESC 3, or ESC 4. */ | |
1212 | 1612 |
1213 #define DECODE_COMPOSITION_START(c1) \ | 1613 #define DECODE_COMPOSITION_START(c1) \ |
1214 do { \ | 1614 do { \ |
1215 if (coding->composing == COMPOSITION_DISABLED) \ | 1615 if (coding->composing == COMPOSITION_DISABLED) \ |
1216 { \ | 1616 { \ |
3086 | 3486 |
3087 switch (XFASTINT (coding_type)) | 3487 switch (XFASTINT (coding_type)) |
3088 { | 3488 { |
3089 case 0: | 3489 case 0: |
3090 coding->type = coding_type_emacs_mule; | 3490 coding->type = coding_type_emacs_mule; |
3491 coding->common_flags | |
3492 |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK; | |
3493 coding->composing = COMPOSITION_NO; | |
3091 if (!NILP (coding->post_read_conversion)) | 3494 if (!NILP (coding->post_read_conversion)) |
3092 coding->common_flags |= CODING_REQUIRE_DECODING_MASK; | 3495 coding->common_flags |= CODING_REQUIRE_DECODING_MASK; |
3093 if (!NILP (coding->pre_write_conversion)) | 3496 if (!NILP (coding->pre_write_conversion)) |
3094 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK; | 3497 coding->common_flags |= CODING_REQUIRE_ENCODING_MASK; |
3095 break; | 3498 break; |