# HG changeset patch # User William Pitcock # Date 1186038021 18000 # Node ID ce2d24746c091690f79036f15a9127391f8620ac # Parent faf6daa29d5c4d0d8cc269fd3ceeff029a466c01 Add support for greek and hebrew character set detection. diff -r faf6daa29d5c -r ce2d24746c09 src/libguess/Makefile --- a/src/libguess/Makefile Thu Aug 02 01:38:22 2007 -0500 +++ b/src/libguess/Makefile Thu Aug 02 02:00:21 2007 -0500 @@ -12,6 +12,8 @@ guess.c \ arabic_impl.c \ cjk_impl.c \ + greek_impl.c \ + hebrew_impl.c \ russian_impl.c \ turkish_impl.c diff -r faf6daa29d5c -r ce2d24746c09 src/libguess/greek_impl.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/libguess/greek_impl.c Thu Aug 02 02:00:21 2007 -0500 @@ -0,0 +1,22 @@ +static const char *_guess_gr(const unsigned char *ptr, int size) +{ + int i; + + for (i = 0; i < size; i++) + { + if (ptr[i] == 0x80 || + (ptr[i] >= 0x82 && ptr[i] <= 0x87) || + ptr[i] == 0x89 || ptr[i] == 0x8B || + (ptr[i] >= 0x91 && ptr[i] <= 0x97) || + ptr[i] == 0x99 || ptr[i] == 0x9B || ptr[i] == 0xA4 || + ptr[i] == 0xA5 || ptr[i] == 0xAE) + return "CP1253"; + } + + return "ISO-8859-7"; +} + +const char *guess_gr(const char *ptr, int size) +{ + return _guess_gr((const unsigned char *) ptr, size); +} diff -r faf6daa29d5c -r ce2d24746c09 src/libguess/guess.c --- a/src/libguess/guess.c Thu Aug 02 01:38:22 2007 -0500 +++ b/src/libguess/guess.c Thu Aug 02 02:00:21 2007 -0500 @@ -33,6 +33,8 @@ guess_impl_register(GUESS_REGION_RU, guess_ru); guess_impl_register(GUESS_REGION_AR, guess_ar); guess_impl_register(GUESS_REGION_TR, guess_tr); + guess_impl_register(GUESS_REGION_GR, guess_gr); + guess_impl_register(GUESS_REGION_HW, guess_hw); } const char *guess_encoding(const char *inbuf, int buflen, const char *lang) diff -r faf6daa29d5c -r ce2d24746c09 src/libguess/hebrew_impl.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/libguess/hebrew_impl.c Thu Aug 02 02:00:21 2007 -0500 @@ -0,0 +1,23 @@ +const char *_guess_hw(const unsigned char *ptr, int size) +{ + int i; + + for (i = 0; i < size; i++) + { + if (ptr[i] == 0x80 || (ptr[i] >= 0x82 && ptr[i] <= 0x89) || ptr[i] == 0x8B || + (ptr[i] >= 0x91 && ptr[i] <= 0x99) || ptr[i] == 0x9B || ptr[i] == 0xA1 || + (ptr[i] >= 0xBF && ptr[i] <= 0xC9) || + (ptr[i] >= 0xCB && ptr[i] <= 0xD8)) + return "CP1255"; + + if (ptr[i] == 0xDF) + return "ISO-8859-8-I"; + } + + return "ISO-8859-8-I"; +} + +const char *guess_hw(const char *ptr, int size) +{ + return _guess_hw((const unsigned char *) ptr, size); +} diff -r faf6daa29d5c -r ce2d24746c09 src/libguess/libguess.h --- a/src/libguess/libguess.h Thu Aug 02 01:38:22 2007 -0500 +++ b/src/libguess/libguess.h Thu Aug 02 02:00:21 2007 -0500 @@ -49,6 +49,8 @@ const char *guess_ru(const char *buf, int buflen); const char *guess_ar(const char *buf, int buflen); const char *guess_tr(const char *buf, int buflen); +const char *guess_gr(const char *buf, int buflen); +const char *guess_hw(const char *buf, int buflen); int dfa_validate_utf8(const char *buf, int buflen); #define GUESS_REGION_JP "japanese" @@ -58,6 +60,8 @@ #define GUESS_REGION_RU "russian" #define GUESS_REGION_AR "arabic" #define GUESS_REGION_TR "turkish" +#define GUESS_REGION_GR "greek" +#define GUESS_REGION_HW "hebrew" const char *guess_encoding(const char *buf, int buflen, const char *lang); void guess_init(void);