view arabic_impl.c @ 2:754a4550c64e

- added arabic, greek, hebrew and turkish DFAs - new UCS-2LE/BE DFAs - now arabic_impl.c uses arabic DFAs - dfa common macros have been moved to dfa.h - minor cleanups
author Yoshiki Yazawa <yaz@cc.rim.or.jp>
date Wed, 11 Jun 2008 00:11:30 +0900
parents d9b6ff839eab
children 70e2c306231e
line wrap: on
line source

#include "libguess.h"
#include "dfa.h"
#include "guess_tab.c"

#define ORDER_AR &utf8, &iso8859_6, &cp1256

const char *guess_ar(const char *buf, int buflen)
{
    int i;
    guess_dfa cp1256 = DFA_INIT(guess_cp1256_st, guess_cp1256_ar);
    guess_dfa iso8859_6 = DFA_INIT(guess_iso8859_6_st, guess_iso8859_6_ar);
    guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar);
    guess_dfa *top = NULL;

    guess_dfa *order[] = { ORDER_AR, NULL };

    for (i = 0; i < buflen; i++) {
        int c = (unsigned char) buf[i];

        if (DFA_ALIVE(cp1256)) {
            if (!DFA_ALIVE(iso8859_6) && !DFA_ALIVE(utf8))
                return "CP1256";
            DFA_NEXT(cp1256, c);
        }
        if (DFA_ALIVE(iso8859_6)) {
            if (!DFA_ALIVE(cp1256) && !DFA_ALIVE(utf8))
                return "ISO-8859-6";
            DFA_NEXT(iso8859_6, c);
        }
        if (DFA_ALIVE(utf8)) {
            if (!DFA_ALIVE(cp1256) && !DFA_ALIVE(iso8859_6))
                return "UTF-8";
            DFA_NEXT(utf8, c);
        }

        if (!DFA_ALIVE(cp1256) && !DFA_ALIVE(iso8859_6) && !DFA_ALIVE(utf8)) {
            /* we ran out the possibilities */
            return NULL;
        }
    }

    /* Now, we have ambigous code.  Pick the highest score.  If more than
       one candidate tie, pick the default encoding. */
    for (i = 0; order[i] != NULL; i++) {
        if (order[i]->state >= 0) { //DFA_ALIVE()
            if (top == NULL || order[i]->score > top->score)
                top = order[i];
        }
    }

    if (top == &cp1256)
        return "CP1256";
    if (top == &utf8)
        return "UTF-8";
    if (top == &iso8859_6)
        return "ISO-8859-6";
    return NULL;
}