Mercurial > emacs
changeset 88361:1ce6f7577173
Include "character.h".
(Qccl, Qcclp): New variables.
(CCL_WRITE_CHAR): Alway treat the arg CH as a character even if
it's less than 256.
(CCL_WRITE_MULTIBYTE_CHAR): Deleted.
(CCL_WRITE_STRING, CCL_READ_CHAR): Adjusted for the change of SRC
and DST type.
(ccl_driver): Types of arguments changed. Code adjusted for that.
(Fccl_execute, Fccl_execute_on_string): Adjusted for the change of
ccl_driver.
(syms_of_ccl): Intern and staticpro Qccl and Qcclp.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Fri, 01 Mar 2002 01:14:09 +0000 |
parents | 18ae5eac157c |
children | 8ced48c5d267 |
files | src/ccl.c |
diffstat | 1 files changed, 181 insertions(+), 331 deletions(-) [+] |
line wrap: on
line diff
--- a/src/ccl.c Fri Mar 01 01:13:46 2002 +0000 +++ b/src/ccl.c Fri Mar 01 01:14:09 2002 +0000 @@ -2,6 +2,9 @@ Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. Copyright (C) 2001 Free Software Foundation, Inc. Licensed to the Free Software Foundation. + Copyright (C) 2001, 2002 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H13PRO009 This file is part of GNU Emacs. @@ -29,6 +32,7 @@ #ifdef emacs #include "lisp.h" +#include "character.h" #include "charset.h" #include "ccl.h" #include "coding.h" @@ -39,6 +43,8 @@ #endif /* not emacs */ +Lisp_Object Qccl, Qcclp; + /* This contains all code conversion map available to CCL. */ Lisp_Object Vcode_conversion_map_vector; @@ -711,56 +717,24 @@ /* Encode one character CH to multibyte form and write to the current output buffer. If CH is less than 256, CH is written as is. */ -#define CCL_WRITE_CHAR(ch) \ - do { \ - int bytes = SINGLE_BYTE_CHAR_P (ch) ? 1: CHAR_BYTES (ch); \ - if (!dst) \ - CCL_INVALID_CMD; \ - else if (dst + bytes + extra_bytes < (dst_bytes ? dst_end : src)) \ - { \ - if (bytes == 1) \ - { \ - *dst++ = (ch); \ - if ((ch) >= 0x80 && (ch) < 0xA0) \ - /* We may have to convert this eight-bit char to \ - multibyte form later. */ \ - extra_bytes++; \ - } \ - else if (CHAR_VALID_P (ch, 0)) \ - dst += CHAR_STRING (ch, dst); \ - else \ - CCL_INVALID_CMD; \ - } \ - else \ - CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \ - } while (0) - -/* Encode one character CH to multibyte form and write to the current - output buffer. The output bytes always forms a valid multibyte - sequence. */ -#define CCL_WRITE_MULTIBYTE_CHAR(ch) \ - do { \ - int bytes = CHAR_BYTES (ch); \ - if (!dst) \ - CCL_INVALID_CMD; \ - else if (dst + bytes + extra_bytes < (dst_bytes ? dst_end : src)) \ - { \ - if (CHAR_VALID_P ((ch), 0)) \ - dst += CHAR_STRING ((ch), dst); \ - else \ - CCL_INVALID_CMD; \ - } \ - else \ - CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \ +#define CCL_WRITE_CHAR(ch) \ + do { \ + if (! dst) \ + CCL_INVALID_CMD; \ + else if (dst < dst_end) \ + *dst++ = (ch); \ + else \ + CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \ } while (0) /* Write a string at ccl_prog[IC] of length LEN to the current output buffer. */ #define CCL_WRITE_STRING(len) \ do { \ + int i; \ if (!dst) \ CCL_INVALID_CMD; \ - else if (dst + len <= (dst_bytes ? dst_end : src)) \ + else if (dst + len <= dst_end) \ for (i = 0; i < len; i++) \ *dst++ = ((XFASTINT (ccl_prog[ic + (i / 3)])) \ >> ((2 - (i % 3)) * 8)) & 0xFF; \ @@ -768,78 +742,30 @@ CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \ } while (0) -/* Read one byte from the current input buffer into REGth register. */ -#define CCL_READ_CHAR(REG) \ - do { \ - if (!src) \ - CCL_INVALID_CMD; \ - else if (src < src_end) \ - { \ - REG = *src++; \ - if (REG == '\n' \ - && ccl->eol_type != CODING_EOL_LF) \ - { \ - /* We are encoding. */ \ - if (ccl->eol_type == CODING_EOL_CRLF) \ - { \ - if (ccl->cr_consumed) \ - ccl->cr_consumed = 0; \ - else \ - { \ - ccl->cr_consumed = 1; \ - REG = '\r'; \ - src--; \ - } \ - } \ - else \ - REG = '\r'; \ - } \ - if (REG == LEADING_CODE_8_BIT_CONTROL \ - && ccl->multibyte) \ - REG = *src++ - 0x20; \ - } \ - else if (ccl->last_block) \ - { \ - ic = ccl->eof_ic; \ - goto ccl_repeat; \ - } \ - else \ - CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC); \ - } while (0) +/* Read one byte from the current input buffer into Rth register. */ +#define CCL_READ_CHAR(r) \ + do { \ + if (! src) \ + CCL_INVALID_CMD; \ + else if (src < src_end) \ + r = *src++; \ + else if (ccl->last_block) \ + { \ + ic = ccl->eof_ic; \ + goto ccl_repeat; \ + } \ + else \ + CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC); \ + } while (0) -/* Set C to the character code made from CHARSET and CODE. This is - like MAKE_CHAR but check the validity of CHARSET and CODE. If they - are not valid, set C to (CODE & 0xFF) because that is usually the - case that CCL_ReadMultibyteChar2 read an invalid code and it set - CODE to that invalid byte. */ - -#define CCL_MAKE_CHAR(charset, code, c) \ - do { \ - if (charset == CHARSET_ASCII) \ - c = code & 0xFF; \ - else if (CHARSET_DEFINED_P (charset) \ - && (code & 0x7F) >= 32 \ - && (code < 256 || ((code >> 7) & 0x7F) >= 32)) \ - { \ - int c1 = code & 0x7F, c2 = 0; \ - \ - if (code >= 256) \ - c2 = c1, c1 = (code >> 7) & 0x7F; \ - c = MAKE_CHAR (charset, c1, c2); \ - } \ - else \ - c = code & 0xFF; \ - } while (0) - - -/* Execute CCL code on SRC_BYTES length text at SOURCE. The resulting - text goes to a place pointed by DESTINATION, the length of which - should not exceed DST_BYTES. The bytes actually processed is - returned as *CONSUMED. The return value is the length of the - resulting text. As a side effect, the contents of CCL registers - are updated. If SOURCE or DESTINATION is NULL, only operations on - registers are permitted. */ +/* Execute CCL code on characters at SOURCE (length SRC_SIZE). The + resulting text goes to a place pointed by DESTINATION, the length + of which should not exceed DST_SIZE. As a side effect, how many + characters are consumed and produced are recorded in CCL->consumed + and CCL->produced, and the contents of CCL registers are updated. + If SOURCE or DESTINATION is NULL, only operations on registers are + permitted. */ #ifdef CCL_DEBUG #define CCL_DEBUG_BACKTRACE_LEN 256 @@ -856,34 +782,29 @@ /* For the moment, we only support depth 256 of stack. */ static struct ccl_prog_stack ccl_prog_stack_struct[256]; -int -ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed) +void +ccl_driver (ccl, source, destination, src_size, dst_size) struct ccl_program *ccl; - unsigned char *source, *destination; - int src_bytes, dst_bytes; - int *consumed; + int *source, *destination; + int src_size, dst_size; { register int *reg = ccl->reg; register int ic = ccl->ic; register int code = 0, field1, field2; register Lisp_Object *ccl_prog = ccl->prog; - unsigned char *src = source, *src_end = src + src_bytes; - unsigned char *dst = destination, *dst_end = dst + dst_bytes; + int *src = source, *src_end = src + src_size; + int *dst = destination, *dst_end = dst + dst_size; int jump_address; int i = 0, j, op; int stack_idx = ccl->stack_idx; /* Instruction counter of the current CCL code. */ int this_ic = 0; - /* CCL_WRITE_CHAR will produce 8-bit code of range 0x80..0x9F. But, - each of them will be converted to multibyte form of 2-byte - sequence. For that conversion, we remember how many more bytes - we must keep in DESTINATION in this variable. */ - int extra_bytes = 0; + struct charset *charset; if (ic >= ccl->eof_ic) ic = CCL_HEADER_MAIN; - if (ccl->buf_magnification == 0) /* We can't produce any bytes. */ + if (ccl->buf_magnification == 0) /* We can't read/produce any bytes. */ dst = NULL; /* Set mapping stack pointer. */ @@ -908,8 +829,8 @@ /* We can't just signal Qquit, instead break the loop as if the whole data is processed. Don't reset Vquit_flag, it must be handled later at a safer place. */ - if (consumed) - src = source + src_bytes; + if (src) + src = source + src_size; ccl->status = CCL_STAT_QUIT; break; } @@ -1224,8 +1145,22 @@ case CCL_LE: reg[rrr] = i <= j; break; case CCL_GE: reg[rrr] = i >= j; break; case CCL_NE: reg[rrr] = i != j; break; - case CCL_DECODE_SJIS: DECODE_SJIS (i, j, reg[rrr], reg[7]); break; - case CCL_ENCODE_SJIS: ENCODE_SJIS (i, j, reg[rrr], reg[7]); break; + case CCL_DECODE_SJIS: + { + i = (i << 8) | j; + SJIS_TO_JIS (i); + reg[rrr] = i >> 8; + reg[7] = i & 0xFF; + break; + } + case CCL_ENCODE_SJIS: + { + i = (i << 8) | j; + JIS_TO_SJIS (i); + reg[rrr] = i >> 8; + reg[7] = i & 0xFF; + break; + } default: CCL_INVALID_CMD; } code &= 0x1F; @@ -1245,165 +1180,38 @@ case CCL_ReadMultibyteChar2: if (!src) CCL_INVALID_CMD; - - if (src >= src_end) - { - src++; - goto ccl_read_multibyte_character_suspend; - } - - if (!ccl->multibyte) - { - int bytes; - if (!UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)) - { - reg[RRR] = CHARSET_8_BIT_CONTROL; - reg[rrr] = *src++; - break; - } - } - i = *src++; - if (i == '\n' && ccl->eol_type != CODING_EOL_LF) - { - /* We are encoding. */ - if (ccl->eol_type == CODING_EOL_CRLF) - { - if (ccl->cr_consumed) - ccl->cr_consumed = 0; - else - { - ccl->cr_consumed = 1; - i = '\r'; - src--; - } - } - else - i = '\r'; - reg[rrr] = i; - reg[RRR] = CHARSET_ASCII; - } - else if (i < 0x80) - { - /* ASCII */ - reg[rrr] = i; - reg[RRR] = CHARSET_ASCII; - } - else if (i <= MAX_CHARSET_OFFICIAL_DIMENSION2) - { - int dimension = BYTES_BY_CHAR_HEAD (i) - 1; - - if (dimension == 0) - { - /* `i' is a leading code for an undefined charset. */ - reg[RRR] = CHARSET_8_BIT_GRAPHIC; - reg[rrr] = i; - } - else if (src + dimension > src_end) - goto ccl_read_multibyte_character_suspend; - else - { - reg[RRR] = i; - i = (*src++ & 0x7F); - if (dimension == 1) - reg[rrr] = i; - else - reg[rrr] = ((i << 7) | (*src++ & 0x7F)); - } - } - else if ((i == LEADING_CODE_PRIVATE_11) - || (i == LEADING_CODE_PRIVATE_12)) - { - if ((src + 1) >= src_end) - goto ccl_read_multibyte_character_suspend; - reg[RRR] = *src++; - reg[rrr] = (*src++ & 0x7F); - } - else if ((i == LEADING_CODE_PRIVATE_21) - || (i == LEADING_CODE_PRIVATE_22)) - { - if ((src + 2) >= src_end) - goto ccl_read_multibyte_character_suspend; - reg[RRR] = *src++; - i = (*src++ & 0x7F); - reg[rrr] = ((i << 7) | (*src & 0x7F)); - src++; - } - else if (i == LEADING_CODE_8_BIT_CONTROL) - { - if (src >= src_end) - goto ccl_read_multibyte_character_suspend; - reg[RRR] = CHARSET_8_BIT_CONTROL; - reg[rrr] = (*src++ - 0x20); - } - else if (i >= 0xA0) - { - reg[RRR] = CHARSET_8_BIT_GRAPHIC; - reg[rrr] = i; - } - else - { - /* INVALID CODE. Return a single byte character. */ - reg[RRR] = CHARSET_ASCII; - reg[rrr] = i; - } - break; - - ccl_read_multibyte_character_suspend: - if (src <= src_end && !ccl->multibyte && ccl->last_block) - { - reg[RRR] = CHARSET_8_BIT_CONTROL; - reg[rrr] = i; - break; - } - src--; - if (ccl->last_block) - { - ic = ccl->eof_ic; - goto ccl_repeat; - } - else - CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC); - + CCL_READ_CHAR (i); + charset = CHAR_CHARSET (i); + reg[rrr] = CHARSET_ID (charset); + reg[RRR] = ENCODE_CHAR (charset, i); break; case CCL_WriteMultibyteChar2: - i = reg[RRR]; /* charset */ - if (i == CHARSET_ASCII - || i == CHARSET_8_BIT_CONTROL - || i == CHARSET_8_BIT_GRAPHIC) - i = reg[rrr] & 0xFF; - else if (CHARSET_DIMENSION (i) == 1) - i = ((i - 0x70) << 7) | (reg[rrr] & 0x7F); - else if (i < MIN_CHARSET_PRIVATE_DIMENSION2) - i = ((i - 0x8F) << 14) | reg[rrr]; - else - i = ((i - 0xE0) << 14) | reg[rrr]; - - CCL_WRITE_MULTIBYTE_CHAR (i); - + if (! dst) + CCL_INVALID_CMD; + charset = CHARSET_FROM_ID (reg[RRR]); + i = DECODE_CHAR (charset, reg[rrr]); + CCL_WRITE_CHAR (i); break; case CCL_TranslateCharacter: - CCL_MAKE_CHAR (reg[RRR], reg[rrr], i); - op = translate_char (GET_TRANSLATION_TABLE (reg[Rrr]), - i, -1, 0, 0); - SPLIT_CHAR (op, reg[RRR], i, j); - if (j != -1) - i = (i << 7) | j; - - reg[rrr] = i; + charset = CHARSET_FROM_ID (reg[RRR]); + i = DECODE_CHAR (charset, reg[rrr]); + op = translate_char (GET_TRANSLATION_TABLE (reg[Rrr]), i); + charset = CHAR_CHARSET (op); + reg[RRR] = CHARSET_ID (charset); + reg[rrr] = ENCODE_CHAR (charset, op); break; case CCL_TranslateCharacterConstTbl: op = XINT (ccl_prog[ic]); /* table */ ic++; - CCL_MAKE_CHAR (reg[RRR], reg[rrr], i); - op = translate_char (GET_TRANSLATION_TABLE (op), i, -1, 0, 0); - SPLIT_CHAR (op, reg[RRR], i, j); - if (j != -1) - i = (i << 7) | j; - - reg[rrr] = i; + charset = CHARSET_FROM_ID (reg[RRR]); + i = DECODE_CHAR (charset, reg[rrr]); + op = translate_char (GET_TRANSLATION_TABLE (op), i); + charset = CHAR_CHARSET (op); + reg[RRR] = CHARSET_ID (charset); + reg[rrr] = ENCODE_CHAR (charset, op); break; case CCL_IterateMultipleMap: @@ -1821,28 +1629,10 @@ } msglen = strlen (msg); - if (dst + msglen <= (dst_bytes ? dst_end : src)) - { - bcopy (msg, dst, msglen); - dst += msglen; - } - - if (ccl->status == CCL_STAT_INVALID_CMD) + if (dst + msglen <= dst_end) { -#if 0 /* If the remaining bytes contain 0x80..0x9F, copying them - results in an invalid multibyte sequence. */ - - /* Copy the remaining source data. */ - int i = src_end - src; - if (dst_bytes && (dst_end - dst) < i) - i = dst_end - dst; - bcopy (src, dst, i); - src += i; - dst += i; -#else - /* Signal that we've consumed everything. */ - src = src_end; -#endif + for (i = 0; i < msglen; i++) + *dst++ = msg[i]; } } @@ -1850,10 +1640,8 @@ ccl->ic = ic; ccl->stack_idx = stack_idx; ccl->prog = ccl_prog; - ccl->eight_bit_control = (extra_bytes > 0); - if (consumed) - *consumed = src - source; - return (dst ? dst - destination : 0); + ccl->consumed = src - source; + ccl->produced = dst - destination; } /* Resolve symbols in the specified CCL code (Lisp vector). This @@ -2003,7 +1791,6 @@ ccl->private_state = 0; ccl->status = 0; ccl->stack_idx = 0; - ccl->eol_type = CODING_EOL_LF; ccl->suppress_error = 0; return 0; } @@ -2066,7 +1853,7 @@ ? XINT (AREF (reg, i)) : 0); - ccl_driver (&ccl, (unsigned char *)0, (unsigned char *)0, 0, 0, (int *)0); + ccl_driver (&ccl, NULL, NULL, 0, 0); QUIT; if (ccl.status != CCL_STAT_SUCCESS) error ("Error in CCL program at %dth code", ccl.ic); @@ -2107,10 +1894,13 @@ { Lisp_Object val; struct ccl_program ccl; - int i, produced; + int i; int outbufsize; - char *outbuf; - struct gcpro gcpro1, gcpro2; + unsigned char *outbuf, *outp; + int str_chars, str_bytes; +#define CCL_EXECUTE_BUF_SIZE 1024 + int source[CCL_EXECUTE_BUF_SIZE], destination[CCL_EXECUTE_BUF_SIZE]; + int consumed_chars, consumed_bytes, produced_chars; if (setup_ccl_program (&ccl, ccl_prog) < 0) error ("Invalid CCL program"); @@ -2119,8 +1909,8 @@ if (ASIZE (status) != 9) error ("Length of vector STATUS is not 9"); CHECK_STRING (str); - - GCPRO2 (status, str); + str_chars = XSTRING (str)->size; + str_bytes = STRING_BYTES (XSTRING (str)); for (i = 0; i < 8; i++) { @@ -2135,33 +1925,87 @@ if (ccl.ic < i && i < ccl.size) ccl.ic = i; } - outbufsize = STRING_BYTES (XSTRING (str)) * ccl.buf_magnification + 256; - outbuf = (char *) xmalloc (outbufsize); - ccl.last_block = NILP (contin); - ccl.multibyte = STRING_MULTIBYTE (str); - produced = ccl_driver (&ccl, XSTRING (str)->data, outbuf, - STRING_BYTES (XSTRING (str)), outbufsize, (int *) 0); - for (i = 0; i < 8; i++) - XSET (AREF (status, i), Lisp_Int, ccl.reg[i]); - XSETINT (AREF (status, 8), ccl.ic); - UNGCPRO; + + outbufsize = (ccl.buf_magnification + ? str_bytes * ccl.buf_magnification + 256 + : str_bytes + 256); + outp = outbuf = (unsigned char *) xmalloc (outbufsize); + + consumed_chars = consumed_bytes = 0; + produced_chars = 0; + while (consumed_bytes < str_bytes) + { + unsigned char *p = XSTRING (str)->data + consumed_bytes; + unsigned char *endp = XSTRING (str)->data + str_bytes; + int i = 0; + int *src, src_size; + + if (endp - p == str_chars - consumed_chars) + while (i < CCL_EXECUTE_BUF_SIZE && p < endp) + source[i++] = *p++; + else + while (i < CCL_EXECUTE_BUF_SIZE && p < endp) + source[i++] = STRING_CHAR_ADVANCE (p); + consumed_chars += i; + consumed_bytes = p - XSTRING (str)->data; - if (NILP (unibyte_p)) - { - int nchars; + if (consumed_bytes == str_bytes) + ccl.last_block = NILP (contin); + src = source; + src_size = i; + while (1) + { + ccl_driver (&ccl, src, destination, src_size, CCL_EXECUTE_BUF_SIZE); + if (ccl.status != CCL_STAT_SUSPEND_BY_DST) + break; + produced_chars += ccl.produced; + if (NILP (unibyte_p)) + { + if (outp - outbuf + MAX_MULTIBYTE_LENGTH * ccl.produced + > outbufsize) + { + int offset = outp - outbuf; + outbufsize += MAX_MULTIBYTE_LENGTH * ccl.produced; + outbuf = (unsigned char *) xrealloc (outbuf, outbufsize); + outp = outbuf + offset; + } + for (i = 0; i < ccl.produced; i++) + CHAR_STRING_ADVANCE (destination[i], outp); + } + else + { + if (outp - outbuf + ccl.produced > outbufsize) + { + int offset = outp - outbuf; + outbufsize += ccl.produced; + outbuf = (unsigned char *) xrealloc (outbuf, outbufsize); + outp = outbuf + offset; + } + for (i = 0; i < ccl.produced; i++) + *outp++ = destination[i]; + } + src += ccl.consumed; + src_size -= ccl.consumed; + } - produced = str_as_multibyte (outbuf, outbufsize, produced, &nchars); - val = make_multibyte_string (outbuf, nchars, produced); - } - else - val = make_unibyte_string (outbuf, produced); - xfree (outbuf); - QUIT; - if (ccl.status == CCL_STAT_SUSPEND_BY_DST) - error ("Output buffer for the CCL programs overflow"); + if (ccl.status != CCL_STAT_SUSPEND_BY_SRC) + break; + } + if (ccl.status != CCL_STAT_SUCCESS && ccl.status != CCL_STAT_SUSPEND_BY_SRC) error ("Error in CCL program at %dth code", ccl.ic); + + for (i = 0; i < 8; i++) + XSET (XVECTOR (status)->contents[i], Lisp_Int, ccl.reg[i]); + XSETINT (XVECTOR (status)->contents[8], ccl.ic); + + if (NILP (unibyte_p)) + val = make_multibyte_string ((char *) outbuf, produced_chars, + outp - outbuf); + else + val = make_unibyte_string ((char *) outbuf, produced_chars); + xfree (outbuf); return val; } @@ -2307,6 +2151,12 @@ staticpro (&Vccl_program_table); Vccl_program_table = Fmake_vector (make_number (32), Qnil); + Qccl = intern ("ccl"); + staticpro (&Qccl); + + Qcclp = intern ("cclp"); + staticpro (&Qcclp); + Qccl_program = intern ("ccl-program"); staticpro (&Qccl_program);