# HG changeset patch # User William Pitcock # Date 1185973693 18000 # Node ID 6bcfc65617115552806d6f0d98aa64274c19b291 # Parent e9f66c3905ece5c101da034a150817d72f0311e3 Implement support for Arabic and Turkish. diff -r e9f66c3905ec -r 6bcfc6561711 src/libguess/Makefile --- a/src/libguess/Makefile Wed Aug 01 07:48:03 2007 -0500 +++ b/src/libguess/Makefile Wed Aug 01 08:08:13 2007 -0500 @@ -10,7 +10,9 @@ SOURCES = \ guess.c \ - russian_impl.c + arabic_impl.c \ + russian_impl.c \ + turkish_impl.c OBJECTS = ${SOURCES:.c=.o} diff -r e9f66c3905ec -r 6bcfc6561711 src/libguess/arabic_impl.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/libguess/arabic_impl.c Wed Aug 01 08:08:13 2007 -0500 @@ -0,0 +1,28 @@ +#include "libguess.h" + +static const char *_guess_ar(const unsigned char *ptr, int size) +{ + int i; + + for (i = 0; i < size; i++) + { + if ((ptr[i] >= 0x80 && ptr[i] <= 0x9F) || + ptr[i] == 0xA1 || ptr[i] == 0xA2 || ptr[i] == 0xA3 || + (ptr[i] >= 0xA5 && ptr[i] <= 0xAB) || + (ptr[i] >= 0xAE && ptr[i] <= 0xBA) || + ptr[i] == 0xBC || ptr[i] == 0xBD || + ptr[i] == 0xBE || ptr[i] == 0xC0 || + (ptr[i] >= 0xDB && ptr[i] <= 0xDF) || (ptr[i] >= 0xF3)) + return "CP1256"; + } + + return "ISO-8859-6"; +} + +const char *guess_ar(const char *ptr, int size) +{ + if (dfa_validate_utf8(ptr, size)) + return "UTF-8"; + + return _guess_ar((const unsigned char *)ptr, size); +} diff -r e9f66c3905ec -r 6bcfc6561711 src/libguess/guess.c --- a/src/libguess/guess.c Wed Aug 01 07:48:03 2007 -0500 +++ b/src/libguess/guess.c Wed Aug 01 08:08:13 2007 -0500 @@ -458,6 +458,8 @@ guess_impl_register(GUESS_REGION_CN, guess_cn); guess_impl_register(GUESS_REGION_KR, guess_kr); guess_impl_register(GUESS_REGION_RU, guess_ru); + guess_impl_register(GUESS_REGION_AR, guess_ar); + guess_impl_register(GUESS_REGION_TR, guess_tr); } const char *guess_encoding(const char *inbuf, int buflen, const char *lang) diff -r e9f66c3905ec -r 6bcfc6561711 src/libguess/libguess.h --- a/src/libguess/libguess.h Wed Aug 01 07:48:03 2007 -0500 +++ b/src/libguess/libguess.h Wed Aug 01 08:08:13 2007 -0500 @@ -47,6 +47,8 @@ const char *guess_cn(const char *buf, int buflen); const char *guess_kr(const char *buf, int buflen); const char *guess_ru(const char *buf, int buflen); +const char *guess_ar(const char *buf, int buflen); +const char *guess_tr(const char *buf, int buflen); int dfa_validate_utf8(const char *buf, int buflen); #define GUESS_REGION_JP "japanese" @@ -54,6 +56,8 @@ #define GUESS_REGION_CN "chinese" #define GUESS_REGION_KR "korean" #define GUESS_REGION_RU "russian" +#define GUESS_REGION_AR "arabic" +#define GUESS_REGION_AR "turkish" const char *guess_encoding(const char *buf, int buflen, const char *lang); void guess_init(void); diff -r e9f66c3905ec -r 6bcfc6561711 src/libguess/turkish_impl.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/libguess/turkish_impl.c Wed Aug 01 08:08:13 2007 -0500 @@ -0,0 +1,25 @@ +#include "libguess.h" + +static const char *_guess_tr(const unsigned char *ptr, int size) +{ + int i; + + for (i = 0; i < size; i++) + { + if (ptr[i] == 0x80 || + (ptr[i] >= 0x82 && ptr[i] <= 0x8C) || + (ptr[i] >= 0x91 && ptr[i] <= 0x9C) || + ptr[ i ] == 0x9F) + return "CP1254"; + } + + return "ISO-8859-9"; +} + +const char *guess_tr(const char *ptr, int size) +{ + if (dfa_validate_utf8(ptr, size)) + return "UTF-8"; + + return _guess_tr((const unsigned char *)ptr, size); +}