view Wnn/romkan/rk_bltinfn.c @ 18:e7e2aba67cb3

disabled build for cWnn and kWnn by default
author Yoshiki Yazawa <yaz@cc.rim.or.jp>
date Mon, 14 Apr 2008 17:33:53 +0900
parents bbc77ca4def5
children a7ccf412ba02
line wrap: on
line source

/*
 *  $Id: rk_bltinfn.c,v 1.6 2005/04/10 15:26:38 aonoto Exp $
 */

/*
 * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
 * This file is part of FreeWnn.
 * 
 * Copyright Kyoto University Research Institute for Mathematical Sciences
 *                 1987, 1988, 1989, 1990, 1991, 1992
 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
 * Copyright FreeWnn Project 1999, 2000, 2002
 *
 * Maintainer:  FreeWnn Project   <freewnn@tomo.gr.jp>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

/***********************************************************************
                        rk_bltinfn.c
                                                87.12. 6  Äû Êä

        ÊÑ´¹ÍѤÎÁȤ߹þ¤ß´Ø¿ô¤Î¤¦¤ÁÊ£»¨¤Ê¤â¤Î¤òÄêµÁ¤·¤Æ¤¢¤ë¡£
        Á´³Ñ¢«¢ªÈ¾³Ñ¤ÎÊÑ´¹¤¬¼çÂΡ£
***********************************************************************/
/*  Version 3.0  */

#ifdef HAVE_CONFIG_H
#  include <config.h>
#endif

#if STDC_HEADERS
#  include <string.h>
#elif HAVE_STRINGS_H
#  include <strings.h>
#endif /* STDC_HEADERS */

#include "commonhd.h"
#include "wnn_config.h"
#include "rk_header.h"

 /* Ⱦ³Ñʸ»ú¤Î¥³¡¼¥É¤Îdefine */
#define HKCHOU  (HNKAK1 * 0x100 + 0xB0) /* Ž° */
#define HKDKTN  (HNKAK1 * 0x100 + 0xDE) /* ŽÞ */
#define HKHNDK  (HNKAK1 * 0x100 + 0xDF) /* Žß */
#define HKMARU  (HNKAK1 * 0x100 + 0xA1) /* Ž¡ */
#define HKHRKG  (HNKAK1 * 0x100 + 0xA2) /* Ž¢ */
#define HKTJKG  (HNKAK1 * 0x100 + 0xA3) /* Ž£ */
#define HKTTEN  (HNKAK1 * 0x100 + 0xA4) /* Ž¤ */
#define HKNKPT  (HNKAK1 * 0x100 + 0xA5) /* Ž¥ */

 /* Á´³Ñʸ»ú¤Î¥³¡¼¥É¤Îdefine */
#define CHOUON  (0xA1BC)        /* ¡¼ */
#define DAKUTN  (0xA1AB)        /* ¡« */
#define HNDAKU  (0xA1AC)        /* ¡¬ */
#define MNMARU  (0xA1A3) /* ¡£ */       /* ̾Á°¤Ï MaNMARU¡Ê¤Þ¤ó¤Þ¤ë¡Ë¤Îά */
#define HRKKAG  (0xA1D6)        /* ¡Ö */
#define TJIKAG  (0xA1D7)        /* ¡× */
#define TOUTEN  (0xA1A2)        /* ¡¢ */
#define NKPOTU  (0xA1A6)        /* ¡¦ */

static char _lang[6];

void
romkan_set_lang (lang)
     char *lang;
{
  strncpy (_lang, lang, 5);
  _lang[5] = 0;
}

 /** ASCIIʸ»ú¢ªÁ´³Ñ */
/* *INDENT-OFF* */
letter
to_zenalpha (l)
    letter l;
/* *INDENT-ON* */
{
  letter retval;

  static uns_chr *data = (uns_chr *) "¡¡¡ª¡É¡ô¡ð¡ó¡õ¡Ç¡Ê¡Ë¡ö¡Ü¡¤¡Ý¡¥¡¿£°£±£²£³£´£µ£¶£·£¸£¹¡§¡¨¡ã¡á¡ä¡©\
¡÷£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú¡Î¡ï¡Ï¡°¡²¡®£á£â£ã£ä£å\
£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú¡Ð¡Ã¡Ñ¡±";

#ifdef  CHINESE
  static uns_chr *data_cn = (uns_chr *) "¡¡£¡¡±££¡ç£¥£¦£§£¨£©¡ù£«£¬¡ª£®£¯£°£±£²£³£´£µ£¶£·£¸£¹£º£»¡´£½¡µ£¿\
£À£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú£Û£¤£Ý£Þ£ß£à£á£â£ã£ä£å\
£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú£û£ü£ý£þ";
#endif /* CHINESE */
#ifdef  KOREAN
  static uns_chr *data_ko = (uns_chr *) "¡¡£¡£¢£££¤£¥£¦£§£¨£©£ª£«£¬£­£®£¯£°£±£²£³£´£µ£¶£·£¸£¹£º£»£¼£½£¾£¿\
£À£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú£Û£Ü£Ý£Þ£ß£à£á£â£ã£ä£å\
£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú£û£ü£ý£þ";
#endif /* KOREAN */

  if (' ' <= l && l <= '~')
    {
      l = (l - ' ') << 1;
#ifdef  CHINESE
      if (!strcmp (_lang, WNN_C_LANG) || !strcmp (_lang, WNN_T_LANG))
        {
          retval = data_cn[l] << 8;
          retval += data_cn[++l];
        }
      else
#endif
#ifdef  KOREAN
      if (!strcmp (_lang, WNN_K_LANG))
        {
          retval = data_ko[l] << 8;
          retval += data_ko[++l];
        }
      else
#endif /* KOREAN */
        {
          retval = data[l] << 8;
          retval += data[++l];
        }
      return (retval);
    }
  else
    return (l);
}

static char *hankdata[] = {
  "Ž§", "Ž±", "Ž¨", "Ž²", "Ž©", "Ž³", "Žª", "Ž´", "Ž«", "Žµ",
  "Ž¶", "Ž¶ŽÞ", "Ž·", "Ž·ŽÞ", "Ž¸", "Ž¸ŽÞ", "Ž¹", "Ž¹ŽÞ", "Žº", "ŽºŽÞ",
  "Ž»", "Ž»ŽÞ", "Ž¼", "Ž¼ŽÞ", "Ž½", "Ž½ŽÞ", "Ž¾", "Ž¾ŽÞ", "Ž¿", "Ž¿ŽÞ",
  "ŽÀ", "ŽÀŽÞ", "ŽÁ", "ŽÁŽÞ", "Ž¯", "ŽÂ", "ŽÂŽÞ", "ŽÃ", "ŽÃŽÞ", "ŽÄ", "ŽÄŽÞ",
  "ŽÅ", "ŽÆ", "ŽÇ", "ŽÈ", "ŽÉ",
  "ŽÊ", "ŽÊŽÞ", "ŽÊŽß", "ŽË", "ŽËŽÞ", "ŽËŽß", "ŽÌ", "ŽÌŽÞ", "ŽÌŽß",
  "ŽÍ", "ŽÍŽÞ", "ŽÍŽß", "ŽÎ", "ŽÎŽÞ", "ŽÎŽß",
  "ŽÏ", "ŽÐ", "ŽÑ", "ŽÒ", "ŽÓ",
  "Ž¬", "ŽÔ", "Ž­", "ŽÕ", "Ž®", "ŽÖ",
  "Ž×", "ŽØ", "ŽÙ", "ŽÚ", "ŽÛ",
  "¥î", "ŽÜ", "¥ð", "¥ñ", "Ž¦", "ŽÝ",
  "Ž³ŽÞ", "¥õ", "¥ö"
};                              /* Á´³Ñ¤¬º®¤¸¤Ã¤Æ¤ë¤Î¤ÇÃí°Õ */

 /**    ¾å¤Îhankdata¤¬¡¢¼ÂºÝ¤Ë»È¤¦È¾³Ñ¥³¡¼¥É¤òɽ¤·¤Æ¤¤¤Ê¤¤¤È¤­¡¢¼ÂºÝ¤Î¤â¤Î¤Ë
        ½¤Àµ¤¹¤ë¡£½é´üÀßÄê»þ¤Ë°ì²ó¤À¤±¸Æ¤Ö */
void
hank_setup ()
{
  int i;
  char *s, orig_hnkak1;

  orig_hnkak1 = *hankdata[0];
  /*     *hankdata[] ¤Ç¤ÎȾ³Ñʸ»ú¤Î£±¥Ð¥¤¥È¤á¡£È¾³Ñʸ»ú¤Î£±¥Ð¥¤¥È¤á¤À¤±¤¬°Û¤Ê¤ë
     ¤è¤¦¤Ê¾µ¡¼ï¤Ë°Ü¿¢¤¹¤ë¤È¤­¤Ï¡¢HNKAK1¤Îdefine¤òÊѤ¨¤ì¤ÐOK¡£Ã¢¤·romkan¤Î
     ¥½¡¼¥¹Ãæ¤ÎȾ³Ñʸ»ú¡Ê¤³¤Î¥Õ¥¡¥¤¥ë¤Ë¤Î¤ß¸ºß¡Ë¤â¥³¥ó¥Ð¡¼¥È¤·¤Æ¡¢¤½¤Îµ¡¼ï
     ¤Ë¹ç¤ï¤»¤ë¤Û¤¦¤¬Ë¾¤Þ¤·¤¤¡£¤·¤«¤·¡¢¥¨¥Ç¥£¥¿¤Ç¤³¤Î¥Õ¥¡¥¤¥ë¤ò½¤Àµ¤·¤¿¤ê
     ¤¹¤ë¾ì¹ç¤Ë¡¢È¾³Ñʸ»ú¤Î°·¤¤¤¬¤¦¤Þ¤¯¤¤¤«¤Ê¤¤¾ì¹ç¤¬¤¢¤ë¤Î¤Ç¡¢ÆäË
     ¥³¥ó¥Ð¡¼¥È¤ò¤·¤Ê¤¯¤È¤âÆ°ºî¤¹¤ë¤è¤¦¤Ë½èÃ֤Ϥ·¤Æ¤¢¤ë¡£¤½¤ì¤¬¡¢¤³¤Î
     hank_setup()¤Ç¤¢¤ë¡£hankdata¤Ï¡¢½é´üÀßÄê»þ¤Ë hank_setup()¤Ë¤è¤Ã¤Æ
     ¼ÂºÝ¤ÎȾ³Ñ¥³¡¼¥É¤Ëľ¤µ¤ì¤ë¡£ */

  if (orig_hnkak1 == (char) HNKAK1)
    return;
  for (i = 0; i < numberof (hankdata); i++)
    {
      for (s = hankdata[i]; *s; s += 2)
        if (*s == orig_hnkak1)
          *s = HNKAK1;
    }
}

 /** ¤«¤Ê¢ªÈ¾³Ñ¥«¥¿¥«¥Ê¡£·ë²Ì¤¬Æóʸ»ú¤Ë¤Ê¤ë¤³¤È¤â¤¢¤ë¡£*/
void
to_hankata (in, outp)
     letter in, **outp;
{
  uns_chr *p, c;
  letter *out;

  out = *outp;
  switch (in)
    {
    case CHOUON:
      *out++ = HKCHOU;
      break;
    case DAKUTN:
      *out++ = HKDKTN;
      break;
    case HNDAKU:
      *out++ = HKHNDK;
      break;
    case MNMARU:
      *out++ = HKMARU;
      break;
    case HRKKAG:
      *out++ = HKHRKG;
      break;
    case TJIKAG:
      *out++ = HKTJKG;
      break;
    case TOUTEN:
      *out++ = HKTTEN;
      break;
    case NKPOTU:
      *out++ = HKNKPT;
      break;
    default:
      if (is_kata (in))
        {
          for (p = (uns_chr *) hankdata[in - KATBGN]; c = *p; p++)
            *out++ = (c << 8) + *++p;
        }
      else if (is_hira (in))
        {
          for (p = (uns_chr *) hankdata[in - HIRBGN]; c = *p; p++)
            *out++ = (c << 8) + *++p;
        }
      else
        {
          *out++ = in;
        }
    }
  *out = EOLTTR;
  *outp = out;
}

 /**    Ⱦ³Ñ¥«¥¿¥«¥Ê¢ª¤Ò¤é¤¬¤Ê¡£Ã¢¤·¡¢ÂùÅÀ¤ò»ý¤Äʸ»ú¤ò°ì¤Ä¤Ë¤Þ¤È¤á¤Æ¤Ï
        ¤¯¤ì¤Ê¤¤¤Î¤ÇÃí°Õ¡£*/
/* *INDENT-OFF* */
letter
to_zenhira (l)
    letter l;
/* *INDENT-ON* */
{
  letter retval;

  static uns_chr *data = (uns_chr *) "¡£¡Ö¡×¡¢¡¦¤ò¤¡¤£¤¥¤§¤©¤ã¤å¤ç¤Ã¡¼¤¢¤¤¤¦¤¨¤ª¤«¤­¤¯¤±¤³¤µ¤·¤¹¤»¤½¤¿\
¤Á¤Ä¤Æ¤È¤Ê¤Ë¤Ì¤Í¤Î¤Ï¤Ò¤Õ¤Ø¤Û¤Þ¤ß¤à¤á¤â¤ä¤æ¤è¤é¤ê¤ë¤ì¤í¤ï¤ó¡«¡¬";

#ifdef  CHINESE
  static uns_chr *data_cn = (uns_chr *) "¡£¡¸¡¹¡¢¡¤¤ò¤¡¤£¤¥¤§¤©¤ã¤å¤ç¤Ã¡¼¤¢¤¤¤¦¤¨¤ªÂ𤭤¯¤±¤³¶È¤·¤¹¤»¤½¤¿\
¤Á¤Ä¤Æ¤È¤Ê¤Ë¤Ì¤ÍµÄ¤Ï¤Ò¤Õ¤Ø¤Û¤Þ¤ß¤à¤áÒ²¤ä¤æ¤è¤é¤ê¤ë¤ì¤í¤ï¤ó¡å¡ã";
#endif /* CHINESE */

  if (is_hankata (l))
    {
      l = (l - HKKBGN) << 1;
#ifdef  CHINESE
      if (!strcmp (_lang, WNN_C_LANG) || !strcmp (_lang, WNN_T_LANG))
        {
          retval = data_cn[l] << 8;
          retval += data_cn[++l];
        }
      else
#endif
        {
          retval = data[l] << 8;
          retval += data[++l];
        }
      return (retval);
    }
  else
    return (l);
}

 /**    Ⱦ³Ñ¥«¥¿¥«¥Ê¢ªÁ´³Ñ¡£Ã¢¤·¡¢ÂùÅÀ¤ò»ý¤Äʸ»ú¤ò°ì¤Ä¤Ë¤Þ¤È¤á¤Æ¤Ï
        ¤¯¤ì¤Ê¤¤¤Î¤ÇÃí°Õ¡£*/
/* *INDENT-OFF* */
letter
to_zenkata (l)
   letter l;
/* *INDENT-ON* */
{
  return (is_hankata (l) ? (l = to_zenhira (l), to_kata (l)) : l);
}

 /* ¥Ó¥Ã¥È¥Ù¥¯¥¿¤Î¹½À® */
#define bitvec(b0, b1, b2, b3, b4, b5, b6, b7) (                         \
        (char)b0 | ((char)b1 << 1) | ((char)b2 << 2) | ((char)b3 << 3) | ((char)b4 << 4) | ((char)b5 << 5) | \
        ((char)b6 << 6) | ((char)b7 << 7)                                                \
)

 /** char¤ÎÇÛÎó h ¤ò¥Ó¥Ã¥È¥Ù¥¯¥¿¤È¸«¤Æ¤½¤ÎÂèi¥Ó¥Ã¥È¤ò¥Á¥§¥Ã¥¯¤¹¤ë */
#define bitlook(h, i) (h[(i) >> 3] & (1 << ((i) & 7)))

#define KATRPT  0xA1B3          /* ¡³ */
#define HIRRPT  0xA1B5          /* ¡µ */
#define KATA_U  0xA5A6          /* ¥¦ */
#define KAT_VU  0xA5F4          /* ¥ô */
#define HIR_KA  0xA4AB          /* ¤« */
#define HIR_HO  0xA4DB          /* ¤Û */
#define KAT_KA  0xA5AB          /* ¥« */
#define KAT_HO  0xA5DB          /* ¥Û */
#define HIR_HA  0xA4CF          /* ¤Ï */
#define KAT_HA  0xA5CF          /* ¥Ï */

 /**    ¸å¤í¤ËȾÂùÅÀ¤ò¤¯¤Ã¤Ä¤±¤ë¡£·ë²Ì¤Ï°ìËô¤ÏÆóʸ»ú¡£*/
void
handakuadd (in, outp)
     letter in, **outp;
{
  if ((HIR_HA <= in && in <= HIR_HO) ? 0 == (in - HIR_HA) % 3 : (KAT_HA <= in && in <= KAT_HO && 0 == (in - KAT_HA) % 3))
    {
      *(*outp)++ = in + 2;
    }
  else
    {
      *(*outp)++ = in;
      *(*outp)++ = HNDAKU;
    }
  **outp = EOLTTR;
}

 /**    ¸å¤í¤ËÂùÅÀ¤ò¤¯¤Ã¤Ä¤±¤ë¡£·ë²Ì¤Ï°ìËô¤ÏÆóʸ»ú¡£*/
void
dakuadd (in, outp)
     letter in, **outp;
{
  static char flgbit[] = {
    bitvec (1, 0, 1, 0, 1, 0, 1, 0),    /* ¤«¤¬¤­¤®¤¯¤°¤±¤² */
    bitvec (1, 0, 1, 0, 1, 0, 1, 0),    /* ¤³¤´¤µ¤¶¤·¤¸¤¹¤º */
    bitvec (1, 0, 1, 0, 1, 0, 1, 0),    /* ¤»¤¼¤½¤¾¤¿¤À¤Á¤Â */
    bitvec (0, 1, 0, 1, 0, 1, 0, 0),    /* ¤Ã¤Ä¤Å¤Æ¤Ç¤È¤É¤Ê */
    bitvec (0, 0, 0, 0, 1, 0, 0, 1),    /* ¤Ë¤Ì¤Í¤Î¤Ï¤Ð¤Ñ¤Ò */
    bitvec (0, 0, 1, 0, 0, 1, 0, 0),    /* ¤Ó¤Ô¤Õ¤Ö¤×¤Ø¤Ù¤Ú */
    bitvec (1, 0, 0, 0, 0, 0, 0, 0)     /* ¤Û */
  };
  letter c;

  if ((HIR_KA <= in && in <= HIR_HO) ? (c = in - HIR_KA, 1) : (KAT_KA <= in && in <= KAT_HO && (c = in - KAT_KA, 1)))
    {
      if (bitlook (flgbit, c))
        {
          *(*outp)++ = in + 1;
        }
      else
        {
          *(*outp)++ = in;
          *(*outp)++ = DAKUTN;
        }
    }
  else
    switch (in)
      {
      case KATRPT:
      case HIRRPT:
        *(*outp)++ = in + 1;
        break;
      case KATA_U:
        *(*outp)++ = KAT_VU;
        break;
      default:
        *(*outp)++ = in;
        *(*outp)++ = DAKUTN;
      }
  **outp = EOLTTR;
}

 /** in¤ÇÍ¿¤¨¤é¤ì¤¿¥³¡¼¥É¤òbase¿Ê¤Î¿ô»ú¤Ë¤·¤Æoutp¤ËÆþ¤ì¤ë¡£*/
void
to_digit (in, base, outp)
     letter in, base, **outp;
{
  letter c, vtol ();

  if (c = in, c /= base)
    to_digit (c, base, outp);
  *(*outp)++ = vtol (in % base);
  **outp = EOLTTR;
}