Mercurial > audlegacy
changeset 3215:ce2d24746c09 trunk
Add support for greek and hebrew character set detection.
author | William Pitcock <nenolod@atheme-project.org> |
---|---|
date | Thu, 02 Aug 2007 02:00:21 -0500 |
parents | faf6daa29d5c |
children | e91acf24afbc |
files | src/libguess/Makefile src/libguess/greek_impl.c src/libguess/guess.c src/libguess/hebrew_impl.c src/libguess/libguess.h |
diffstat | 5 files changed, 53 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/src/libguess/Makefile Thu Aug 02 01:38:22 2007 -0500 +++ b/src/libguess/Makefile Thu Aug 02 02:00:21 2007 -0500 @@ -12,6 +12,8 @@ guess.c \ arabic_impl.c \ cjk_impl.c \ + greek_impl.c \ + hebrew_impl.c \ russian_impl.c \ turkish_impl.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/libguess/greek_impl.c Thu Aug 02 02:00:21 2007 -0500 @@ -0,0 +1,22 @@ +static const char *_guess_gr(const unsigned char *ptr, int size) +{ + int i; + + for (i = 0; i < size; i++) + { + if (ptr[i] == 0x80 || + (ptr[i] >= 0x82 && ptr[i] <= 0x87) || + ptr[i] == 0x89 || ptr[i] == 0x8B || + (ptr[i] >= 0x91 && ptr[i] <= 0x97) || + ptr[i] == 0x99 || ptr[i] == 0x9B || ptr[i] == 0xA4 || + ptr[i] == 0xA5 || ptr[i] == 0xAE) + return "CP1253"; + } + + return "ISO-8859-7"; +} + +const char *guess_gr(const char *ptr, int size) +{ + return _guess_gr((const unsigned char *) ptr, size); +}
--- a/src/libguess/guess.c Thu Aug 02 01:38:22 2007 -0500 +++ b/src/libguess/guess.c Thu Aug 02 02:00:21 2007 -0500 @@ -33,6 +33,8 @@ guess_impl_register(GUESS_REGION_RU, guess_ru); guess_impl_register(GUESS_REGION_AR, guess_ar); guess_impl_register(GUESS_REGION_TR, guess_tr); + guess_impl_register(GUESS_REGION_GR, guess_gr); + guess_impl_register(GUESS_REGION_HW, guess_hw); } const char *guess_encoding(const char *inbuf, int buflen, const char *lang)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/libguess/hebrew_impl.c Thu Aug 02 02:00:21 2007 -0500 @@ -0,0 +1,23 @@ +const char *_guess_hw(const unsigned char *ptr, int size) +{ + int i; + + for (i = 0; i < size; i++) + { + if (ptr[i] == 0x80 || (ptr[i] >= 0x82 && ptr[i] <= 0x89) || ptr[i] == 0x8B || + (ptr[i] >= 0x91 && ptr[i] <= 0x99) || ptr[i] == 0x9B || ptr[i] == 0xA1 || + (ptr[i] >= 0xBF && ptr[i] <= 0xC9) || + (ptr[i] >= 0xCB && ptr[i] <= 0xD8)) + return "CP1255"; + + if (ptr[i] == 0xDF) + return "ISO-8859-8-I"; + } + + return "ISO-8859-8-I"; +} + +const char *guess_hw(const char *ptr, int size) +{ + return _guess_hw((const unsigned char *) ptr, size); +}
--- a/src/libguess/libguess.h Thu Aug 02 01:38:22 2007 -0500 +++ b/src/libguess/libguess.h Thu Aug 02 02:00:21 2007 -0500 @@ -49,6 +49,8 @@ const char *guess_ru(const char *buf, int buflen); const char *guess_ar(const char *buf, int buflen); const char *guess_tr(const char *buf, int buflen); +const char *guess_gr(const char *buf, int buflen); +const char *guess_hw(const char *buf, int buflen); int dfa_validate_utf8(const char *buf, int buflen); #define GUESS_REGION_JP "japanese" @@ -58,6 +60,8 @@ #define GUESS_REGION_RU "russian" #define GUESS_REGION_AR "arabic" #define GUESS_REGION_TR "turkish" +#define GUESS_REGION_GR "greek" +#define GUESS_REGION_HW "hebrew" const char *guess_encoding(const char *buf, int buflen, const char *lang); void guess_init(void);