diff Wnn/romkan/rk_bltinfn.c @ 0:bbc77ca4def5

initial import
author Yoshiki Yazawa <yaz@cc.rim.or.jp>
date Thu, 13 Dec 2007 04:30:14 +0900
parents
children a7ccf412ba02
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Wnn/romkan/rk_bltinfn.c	Thu Dec 13 04:30:14 2007 +0900
@@ -0,0 +1,380 @@
+/*
+ *  $Id: rk_bltinfn.c,v 1.6 2005/04/10 15:26:38 aonoto Exp $
+ */
+
+/*
+ * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
+ * This file is part of FreeWnn.
+ * 
+ * Copyright Kyoto University Research Institute for Mathematical Sciences
+ *                 1987, 1988, 1989, 1990, 1991, 1992
+ * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
+ * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
+ * Copyright FreeWnn Project 1999, 2000, 2002
+ *
+ * Maintainer:  FreeWnn Project   <freewnn@tomo.gr.jp>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/***********************************************************************
+                        rk_bltinfn.c
+                                                87.12. 6  Äû Êä
+
+        ÊÑ´¹ÍѤÎÁȤ߹þ¤ß´Ø¿ô¤Î¤¦¤ÁÊ£»¨¤Ê¤â¤Î¤òÄêµÁ¤·¤Æ¤¢¤ë¡£
+        Á´³Ñ¢«¢ªÈ¾³Ñ¤ÎÊÑ´¹¤¬¼çÂΡ£
+***********************************************************************/
+/*  Version 3.0  */
+
+#ifdef HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#if STDC_HEADERS
+#  include <string.h>
+#elif HAVE_STRINGS_H
+#  include <strings.h>
+#endif /* STDC_HEADERS */
+
+#include "commonhd.h"
+#include "wnn_config.h"
+#include "rk_header.h"
+
+ /* Ⱦ³Ñʸ»ú¤Î¥³¡¼¥É¤Îdefine */
+#define HKCHOU  (HNKAK1 * 0x100 + 0xB0) /* Ž° */
+#define HKDKTN  (HNKAK1 * 0x100 + 0xDE) /* ŽÞ */
+#define HKHNDK  (HNKAK1 * 0x100 + 0xDF) /* Žß */
+#define HKMARU  (HNKAK1 * 0x100 + 0xA1) /* Ž¡ */
+#define HKHRKG  (HNKAK1 * 0x100 + 0xA2) /* Ž¢ */
+#define HKTJKG  (HNKAK1 * 0x100 + 0xA3) /* Ž£ */
+#define HKTTEN  (HNKAK1 * 0x100 + 0xA4) /* Ž¤ */
+#define HKNKPT  (HNKAK1 * 0x100 + 0xA5) /* Ž¥ */
+
+ /* Á´³Ñʸ»ú¤Î¥³¡¼¥É¤Îdefine */
+#define CHOUON  (0xA1BC)        /* ¡¼ */
+#define DAKUTN  (0xA1AB)        /* ¡« */
+#define HNDAKU  (0xA1AC)        /* ¡¬ */
+#define MNMARU  (0xA1A3) /* ¡£ */       /* ̾Á°¤Ï MaNMARU¡Ê¤Þ¤ó¤Þ¤ë¡Ë¤Îά */
+#define HRKKAG  (0xA1D6)        /* ¡Ö */
+#define TJIKAG  (0xA1D7)        /* ¡× */
+#define TOUTEN  (0xA1A2)        /* ¡¢ */
+#define NKPOTU  (0xA1A6)        /* ¡¦ */
+
+static char _lang[6];
+
+void
+romkan_set_lang (lang)
+     char *lang;
+{
+  strncpy (_lang, lang, 5);
+  _lang[5] = 0;
+}
+
+ /** ASCIIʸ»ú¢ªÁ´³Ñ */
+/* *INDENT-OFF* */
+letter
+to_zenalpha (l)
+    letter l;
+/* *INDENT-ON* */
+{
+  letter retval;
+
+  static uns_chr *data = (uns_chr *) "¡¡¡ª¡É¡ô¡ð¡ó¡õ¡Ç¡Ê¡Ë¡ö¡Ü¡¤¡Ý¡¥¡¿£°£±£²£³£´£µ£¶£·£¸£¹¡§¡¨¡ã¡á¡ä¡©\
+¡÷£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú¡Î¡ï¡Ï¡°¡²¡®£á£â£ã£ä£å\
+£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú¡Ð¡Ã¡Ñ¡±";
+
+#ifdef  CHINESE
+  static uns_chr *data_cn = (uns_chr *) "¡¡£¡¡±££¡ç£¥£¦£§£¨£©¡ù£«£¬¡ª£®£¯£°£±£²£³£´£µ£¶£·£¸£¹£º£»¡´£½¡µ£¿\
+£À£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú£Û£¤£Ý£Þ£ß£à£á£â£ã£ä£å\
+£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú£û£ü£ý£þ";
+#endif /* CHINESE */
+#ifdef  KOREAN
+  static uns_chr *data_ko = (uns_chr *) "¡¡£¡£¢£££¤£¥£¦£§£¨£©£ª£«£¬£­£®£¯£°£±£²£³£´£µ£¶£·£¸£¹£º£»£¼£½£¾£¿\
+£À£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú£Û£Ü£Ý£Þ£ß£à£á£â£ã£ä£å\
+£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú£û£ü£ý£þ";
+#endif /* KOREAN */
+
+  if (' ' <= l && l <= '~')
+    {
+      l = (l - ' ') << 1;
+#ifdef  CHINESE
+      if (!strcmp (_lang, WNN_C_LANG) || !strcmp (_lang, WNN_T_LANG))
+        {
+          retval = data_cn[l] << 8;
+          retval += data_cn[++l];
+        }
+      else
+#endif
+#ifdef  KOREAN
+      if (!strcmp (_lang, WNN_K_LANG))
+        {
+          retval = data_ko[l] << 8;
+          retval += data_ko[++l];
+        }
+      else
+#endif /* KOREAN */
+        {
+          retval = data[l] << 8;
+          retval += data[++l];
+        }
+      return (retval);
+    }
+  else
+    return (l);
+}
+
+static char *hankdata[] = {
+  "Ž§", "Ž±", "Ž¨", "Ž²", "Ž©", "Ž³", "Žª", "Ž´", "Ž«", "Žµ",
+  "Ž¶", "Ž¶ŽÞ", "Ž·", "Ž·ŽÞ", "Ž¸", "Ž¸ŽÞ", "Ž¹", "Ž¹ŽÞ", "Žº", "ŽºŽÞ",
+  "Ž»", "Ž»ŽÞ", "Ž¼", "Ž¼ŽÞ", "Ž½", "Ž½ŽÞ", "Ž¾", "Ž¾ŽÞ", "Ž¿", "Ž¿ŽÞ",
+  "ŽÀ", "ŽÀŽÞ", "ŽÁ", "ŽÁŽÞ", "Ž¯", "ŽÂ", "ŽÂŽÞ", "ŽÃ", "ŽÃŽÞ", "ŽÄ", "ŽÄŽÞ",
+  "ŽÅ", "ŽÆ", "ŽÇ", "ŽÈ", "ŽÉ",
+  "ŽÊ", "ŽÊŽÞ", "ŽÊŽß", "ŽË", "ŽËŽÞ", "ŽËŽß", "ŽÌ", "ŽÌŽÞ", "ŽÌŽß",
+  "ŽÍ", "ŽÍŽÞ", "ŽÍŽß", "ŽÎ", "ŽÎŽÞ", "ŽÎŽß",
+  "ŽÏ", "ŽÐ", "ŽÑ", "ŽÒ", "ŽÓ",
+  "Ž¬", "ŽÔ", "Ž­", "ŽÕ", "Ž®", "ŽÖ",
+  "Ž×", "ŽØ", "ŽÙ", "ŽÚ", "ŽÛ",
+  "¥î", "ŽÜ", "¥ð", "¥ñ", "Ž¦", "ŽÝ",
+  "Ž³ŽÞ", "¥õ", "¥ö"
+};                              /* Á´³Ñ¤¬º®¤¸¤Ã¤Æ¤ë¤Î¤ÇÃí°Õ */
+
+ /**    ¾å¤Îhankdata¤¬¡¢¼ÂºÝ¤Ë»È¤¦È¾³Ñ¥³¡¼¥É¤òɽ¤·¤Æ¤¤¤Ê¤¤¤È¤­¡¢¼ÂºÝ¤Î¤â¤Î¤Ë
+        ½¤Àµ¤¹¤ë¡£½é´üÀßÄê»þ¤Ë°ì²ó¤À¤±¸Æ¤Ö */
+void
+hank_setup ()
+{
+  int i;
+  char *s, orig_hnkak1;
+
+  orig_hnkak1 = *hankdata[0];
+  /*     *hankdata[] ¤Ç¤ÎȾ³Ñʸ»ú¤Î£±¥Ð¥¤¥È¤á¡£È¾³Ñʸ»ú¤Î£±¥Ð¥¤¥È¤á¤À¤±¤¬°Û¤Ê¤ë
+     ¤è¤¦¤Ê¾µ¡¼ï¤Ë°Ü¿¢¤¹¤ë¤È¤­¤Ï¡¢HNKAK1¤Îdefine¤òÊѤ¨¤ì¤ÐOK¡£Ã¢¤·romkan¤Î
+     ¥½¡¼¥¹Ãæ¤ÎȾ³Ñʸ»ú¡Ê¤³¤Î¥Õ¥¡¥¤¥ë¤Ë¤Î¤ß¸ºß¡Ë¤â¥³¥ó¥Ð¡¼¥È¤·¤Æ¡¢¤½¤Îµ¡¼ï
+     ¤Ë¹ç¤ï¤»¤ë¤Û¤¦¤¬Ë¾¤Þ¤·¤¤¡£¤·¤«¤·¡¢¥¨¥Ç¥£¥¿¤Ç¤³¤Î¥Õ¥¡¥¤¥ë¤ò½¤Àµ¤·¤¿¤ê
+     ¤¹¤ë¾ì¹ç¤Ë¡¢È¾³Ñʸ»ú¤Î°·¤¤¤¬¤¦¤Þ¤¯¤¤¤«¤Ê¤¤¾ì¹ç¤¬¤¢¤ë¤Î¤Ç¡¢ÆäË
+     ¥³¥ó¥Ð¡¼¥È¤ò¤·¤Ê¤¯¤È¤âÆ°ºî¤¹¤ë¤è¤¦¤Ë½èÃ֤Ϥ·¤Æ¤¢¤ë¡£¤½¤ì¤¬¡¢¤³¤Î
+     hank_setup()¤Ç¤¢¤ë¡£hankdata¤Ï¡¢½é´üÀßÄê»þ¤Ë hank_setup()¤Ë¤è¤Ã¤Æ
+     ¼ÂºÝ¤ÎȾ³Ñ¥³¡¼¥É¤Ëľ¤µ¤ì¤ë¡£ */
+
+  if (orig_hnkak1 == (char) HNKAK1)
+    return;
+  for (i = 0; i < numberof (hankdata); i++)
+    {
+      for (s = hankdata[i]; *s; s += 2)
+        if (*s == orig_hnkak1)
+          *s = HNKAK1;
+    }
+}
+
+ /** ¤«¤Ê¢ªÈ¾³Ñ¥«¥¿¥«¥Ê¡£·ë²Ì¤¬Æóʸ»ú¤Ë¤Ê¤ë¤³¤È¤â¤¢¤ë¡£*/
+void
+to_hankata (in, outp)
+     letter in, **outp;
+{
+  uns_chr *p, c;
+  letter *out;
+
+  out = *outp;
+  switch (in)
+    {
+    case CHOUON:
+      *out++ = HKCHOU;
+      break;
+    case DAKUTN:
+      *out++ = HKDKTN;
+      break;
+    case HNDAKU:
+      *out++ = HKHNDK;
+      break;
+    case MNMARU:
+      *out++ = HKMARU;
+      break;
+    case HRKKAG:
+      *out++ = HKHRKG;
+      break;
+    case TJIKAG:
+      *out++ = HKTJKG;
+      break;
+    case TOUTEN:
+      *out++ = HKTTEN;
+      break;
+    case NKPOTU:
+      *out++ = HKNKPT;
+      break;
+    default:
+      if (is_kata (in))
+        {
+          for (p = (uns_chr *) hankdata[in - KATBGN]; c = *p; p++)
+            *out++ = (c << 8) + *++p;
+        }
+      else if (is_hira (in))
+        {
+          for (p = (uns_chr *) hankdata[in - HIRBGN]; c = *p; p++)
+            *out++ = (c << 8) + *++p;
+        }
+      else
+        {
+          *out++ = in;
+        }
+    }
+  *out = EOLTTR;
+  *outp = out;
+}
+
+ /**    Ⱦ³Ñ¥«¥¿¥«¥Ê¢ª¤Ò¤é¤¬¤Ê¡£Ã¢¤·¡¢ÂùÅÀ¤ò»ý¤Äʸ»ú¤ò°ì¤Ä¤Ë¤Þ¤È¤á¤Æ¤Ï
+        ¤¯¤ì¤Ê¤¤¤Î¤ÇÃí°Õ¡£*/
+/* *INDENT-OFF* */
+letter
+to_zenhira (l)
+    letter l;
+/* *INDENT-ON* */
+{
+  letter retval;
+
+  static uns_chr *data = (uns_chr *) "¡£¡Ö¡×¡¢¡¦¤ò¤¡¤£¤¥¤§¤©¤ã¤å¤ç¤Ã¡¼¤¢¤¤¤¦¤¨¤ª¤«¤­¤¯¤±¤³¤µ¤·¤¹¤»¤½¤¿\
+¤Á¤Ä¤Æ¤È¤Ê¤Ë¤Ì¤Í¤Î¤Ï¤Ò¤Õ¤Ø¤Û¤Þ¤ß¤à¤á¤â¤ä¤æ¤è¤é¤ê¤ë¤ì¤í¤ï¤ó¡«¡¬";
+
+#ifdef  CHINESE
+  static uns_chr *data_cn = (uns_chr *) "¡£¡¸¡¹¡¢¡¤¤ò¤¡¤£¤¥¤§¤©¤ã¤å¤ç¤Ã¡¼¤¢¤¤¤¦¤¨¤ªÂ𤭤¯¤±¤³¶È¤·¤¹¤»¤½¤¿\
+¤Á¤Ä¤Æ¤È¤Ê¤Ë¤Ì¤ÍµÄ¤Ï¤Ò¤Õ¤Ø¤Û¤Þ¤ß¤à¤áÒ²¤ä¤æ¤è¤é¤ê¤ë¤ì¤í¤ï¤ó¡å¡ã";
+#endif /* CHINESE */
+
+  if (is_hankata (l))
+    {
+      l = (l - HKKBGN) << 1;
+#ifdef  CHINESE
+      if (!strcmp (_lang, WNN_C_LANG) || !strcmp (_lang, WNN_T_LANG))
+        {
+          retval = data_cn[l] << 8;
+          retval += data_cn[++l];
+        }
+      else
+#endif
+        {
+          retval = data[l] << 8;
+          retval += data[++l];
+        }
+      return (retval);
+    }
+  else
+    return (l);
+}
+
+ /**    Ⱦ³Ñ¥«¥¿¥«¥Ê¢ªÁ´³Ñ¡£Ã¢¤·¡¢ÂùÅÀ¤ò»ý¤Äʸ»ú¤ò°ì¤Ä¤Ë¤Þ¤È¤á¤Æ¤Ï
+        ¤¯¤ì¤Ê¤¤¤Î¤ÇÃí°Õ¡£*/
+/* *INDENT-OFF* */
+letter
+to_zenkata (l)
+   letter l;
+/* *INDENT-ON* */
+{
+  return (is_hankata (l) ? (l = to_zenhira (l), to_kata (l)) : l);
+}
+
+ /* ¥Ó¥Ã¥È¥Ù¥¯¥¿¤Î¹½À® */
+#define bitvec(b0, b1, b2, b3, b4, b5, b6, b7) (                         \
+        (char)b0 | ((char)b1 << 1) | ((char)b2 << 2) | ((char)b3 << 3) | ((char)b4 << 4) | ((char)b5 << 5) | \
+        ((char)b6 << 6) | ((char)b7 << 7)                                                \
+)
+
+ /** char¤ÎÇÛÎó h ¤ò¥Ó¥Ã¥È¥Ù¥¯¥¿¤È¸«¤Æ¤½¤ÎÂèi¥Ó¥Ã¥È¤ò¥Á¥§¥Ã¥¯¤¹¤ë */
+#define bitlook(h, i) (h[(i) >> 3] & (1 << ((i) & 7)))
+
+#define KATRPT  0xA1B3          /* ¡³ */
+#define HIRRPT  0xA1B5          /* ¡µ */
+#define KATA_U  0xA5A6          /* ¥¦ */
+#define KAT_VU  0xA5F4          /* ¥ô */
+#define HIR_KA  0xA4AB          /* ¤« */
+#define HIR_HO  0xA4DB          /* ¤Û */
+#define KAT_KA  0xA5AB          /* ¥« */
+#define KAT_HO  0xA5DB          /* ¥Û */
+#define HIR_HA  0xA4CF          /* ¤Ï */
+#define KAT_HA  0xA5CF          /* ¥Ï */
+
+ /**    ¸å¤í¤ËȾÂùÅÀ¤ò¤¯¤Ã¤Ä¤±¤ë¡£·ë²Ì¤Ï°ìËô¤ÏÆóʸ»ú¡£*/
+void
+handakuadd (in, outp)
+     letter in, **outp;
+{
+  if ((HIR_HA <= in && in <= HIR_HO) ? 0 == (in - HIR_HA) % 3 : (KAT_HA <= in && in <= KAT_HO && 0 == (in - KAT_HA) % 3))
+    {
+      *(*outp)++ = in + 2;
+    }
+  else
+    {
+      *(*outp)++ = in;
+      *(*outp)++ = HNDAKU;
+    }
+  **outp = EOLTTR;
+}
+
+ /**    ¸å¤í¤ËÂùÅÀ¤ò¤¯¤Ã¤Ä¤±¤ë¡£·ë²Ì¤Ï°ìËô¤ÏÆóʸ»ú¡£*/
+void
+dakuadd (in, outp)
+     letter in, **outp;
+{
+  static char flgbit[] = {
+    bitvec (1, 0, 1, 0, 1, 0, 1, 0),    /* ¤«¤¬¤­¤®¤¯¤°¤±¤² */
+    bitvec (1, 0, 1, 0, 1, 0, 1, 0),    /* ¤³¤´¤µ¤¶¤·¤¸¤¹¤º */
+    bitvec (1, 0, 1, 0, 1, 0, 1, 0),    /* ¤»¤¼¤½¤¾¤¿¤À¤Á¤Â */
+    bitvec (0, 1, 0, 1, 0, 1, 0, 0),    /* ¤Ã¤Ä¤Å¤Æ¤Ç¤È¤É¤Ê */
+    bitvec (0, 0, 0, 0, 1, 0, 0, 1),    /* ¤Ë¤Ì¤Í¤Î¤Ï¤Ð¤Ñ¤Ò */
+    bitvec (0, 0, 1, 0, 0, 1, 0, 0),    /* ¤Ó¤Ô¤Õ¤Ö¤×¤Ø¤Ù¤Ú */
+    bitvec (1, 0, 0, 0, 0, 0, 0, 0)     /* ¤Û */
+  };
+  letter c;
+
+  if ((HIR_KA <= in && in <= HIR_HO) ? (c = in - HIR_KA, 1) : (KAT_KA <= in && in <= KAT_HO && (c = in - KAT_KA, 1)))
+    {
+      if (bitlook (flgbit, c))
+        {
+          *(*outp)++ = in + 1;
+        }
+      else
+        {
+          *(*outp)++ = in;
+          *(*outp)++ = DAKUTN;
+        }
+    }
+  else
+    switch (in)
+      {
+      case KATRPT:
+      case HIRRPT:
+        *(*outp)++ = in + 1;
+        break;
+      case KATA_U:
+        *(*outp)++ = KAT_VU;
+        break;
+      default:
+        *(*outp)++ = in;
+        *(*outp)++ = DAKUTN;
+      }
+  **outp = EOLTTR;
+}
+
+ /** in¤ÇÍ¿¤¨¤é¤ì¤¿¥³¡¼¥É¤òbase¿Ê¤Î¿ô»ú¤Ë¤·¤Æoutp¤ËÆþ¤ì¤ë¡£*/
+void
+to_digit (in, base, outp)
+     letter in, base, **outp;
+{
+  letter c, vtol ();
+
+  if (c = in, c /= base)
+    to_digit (c, base, outp);
+  *(*outp)++ = vtol (in % base);
+  **outp = EOLTTR;
+}