Mercurial > emacs
changeset 71299:767eeffaf27a
(read_escape): Provide a Unicode character escape syntax; \u followed by
exactly four or \U followed by exactly eight hex digits in a comment or
string is read as a Unicode character with that code point.
author | Eli Zaretskii <eliz@gnu.org> |
---|---|
date | Fri, 09 Jun 2006 18:22:30 +0000 |
parents | 8f770ba8e00d |
children | 818392fb6dc3 |
files | src/lread.c |
diffstat | 1 files changed, 49 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lread.c Fri Jun 09 18:11:21 2006 +0000 +++ b/src/lread.c Fri Jun 09 18:22:30 2006 +0000 @@ -1764,6 +1764,9 @@ int *byterep; { register int c = READCHAR; + /* \u allows up to four hex digits, \U up to eight. Default to the + behaviour for \u, and change this value in the case that \U is seen. */ + int unicode_hex_count = 4; *byterep = 0; @@ -1928,6 +1931,52 @@ return i; } + case 'U': + /* Post-Unicode-2.0: Up to eight hex chars. */ + unicode_hex_count = 8; + case 'u': + + /* A Unicode escape. We only permit them in strings and characters, + not arbitrarily in the source code, as in some other languages. */ + { + int i = 0; + int count = 0; + Lisp_Object lisp_char; + struct gcpro gcpro1; + + while (++count <= unicode_hex_count) + { + c = READCHAR; + /* isdigit(), isalpha() may be locale-specific, which we don't + want. */ + if (c >= '0' && c <= '9') i = (i << 4) + (c - '0'); + else if (c >= 'a' && c <= 'f') i = (i << 4) + (c - 'a') + 10; + else if (c >= 'A' && c <= 'F') i = (i << 4) + (c - 'A') + 10; + else + { + error ("Non-hex digit used for Unicode escape"); + break; + } + } + + GCPRO1 (readcharfun); + lisp_char = call2(intern("decode-char"), intern("ucs"), + make_number(i)); + UNGCPRO; + + if (EQ(Qnil, lisp_char)) + { + /* This is ugly and horrible and trashes the user's data. */ + XSETFASTINT (i, MAKE_CHAR (charset_katakana_jisx0201, + 34 + 128, 46 + 128)); + return i; + } + else + { + return XFASTINT (lisp_char); + } + } + default: if (BASE_LEADING_CODE_P (c)) c = read_multibyte (c, readcharfun);