view admin/charsets/cp932.awk @ 98182:19ec1646fe6c

The Rmail/mbox merge has been abandoned in favor of a restart using the current rmail.el file. A comprehensive list of changes will be supplied when pmail.el is morphed back into rmail.el The current status is that pmail.el supports basic Rmail navigation (no summary support) and shows the current message in a special buffer using buffer-swap-text. No decoding is done yet. That is the next step.
author Paul Reilly <pmr@pajato.com>
date Mon, 15 Sep 2008 20:56:53 +0000
parents eb2d9dfc8486
children ce88a631c161
line wrap: on
line source

# cp932.awk -- Add sort keys and append user defined area to CP932-2BYTE.map.
# Copyright (C) 2004, 2005, 2006, 2007, 2008
#   National Institute of Advanced Industrial Science and Technology (AIST)
#   Registration Number H13PRO009

# This file is part of GNU Emacs.

# GNU Emacs is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# GNU Emacs is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.

# Commentary:

# Add a sort key 0, 1, 2, or 3 at the tail of each line as a comment
# to realize the round trip mapping to Unicode works as described in
# this page:
#	http://support.microsoft.com/default.aspx?scid=kb;EN-US;170559
# Each sort key means as below:
#   0: JISX0208 characters.
#   1: NEC special characters.
#   2: IBM extension characters.
#   3: NEC selection of IBM extension characters.
#   4: user defined area

BEGIN {
  tohex["A"] = 10;
  tohex["B"] = 11;
  tohex["C"] = 12;
  tohex["D"] = 13;
  tohex["E"] = 14;
  tohex["F"] = 15;
}

function decode_hex(str) {
  n = 0;
  len = length(str);
  for (i = 1; i <= len; i++)
    {
      c = substr(str, i, 1);
      if (c >= "0" && c <= "9")
	n = n * 16 + (c - "0");
      else
	n = n * 16 + tohex[c];
    }
  return n;
}

function sjis_to_jis_ku(code)
{
  s1 = int(code / 256);
  s2 = code % 256;
  if (s2 >= 159)		# s2 >= 0x9F
    {
      if (s1 >= 224)		# s1 >= 0xE0
	j1 = s1 * 2 - 352;	# j1 = s1 * 2 - 0x160
      else
	j1 = s1 * 2 - 224;	# j1 = s1 * 2 - 0xE0
      j2 = s2 - 126		# j2 = s2 - #x7E
    }
  else
    {
      if (s1 >= 224)
	j1 = s1 * 2 - 353;	# j1 = s1 * 2 - 0x161
      else
	j1 = s1 * 2 - 225;	# j1 = s1 * 2 - 0xE1
      if (s2 >= 127)		# s2 >= #x7F
	j2 = s2 - 32;
      else
	j2 = s2 - 31;
    }
  return j1 - 32;
}

/^0x[89E]/ {
  sjis=decode_hex(substr($1, 3, 4))
  ku=sjis_to_jis_ku(sjis);
  if (ku == 13)
    printf "%s # 1 %02X%02X\n", $0, j1, j2;
  else if (ku >= 89 && ku <= 92)
    printf "%s # 3 %02X%02X\n", $0, j1, j2;
  else
    printf "%s # 0 %02X%02X\n", $0, j1, j2;
  next;
}

/^0xF/ {
  printf "%s # 2\n", $0;
  next;
}

{
  print;
}

END {
  code = 57344;			# 0xE000
  for (i = 240; i < 250; i++)
    {
      for (j = 64; j <= 126; j++)
	printf "0x%02X%02X 0x%04X # 4\n", i, j, code++;
      for (j = 128; j <= 158; j++)
	printf "0x%02X%02X 0x%04X # 4\n", i, j, code++;
      for (; j <= 252; j++)
	printf "0x%02X%02X 0x%04X # 4\n", i, j, code++;
    }
}

# arch-tag: 998dc444-759d-43ef-87e3-2ab205011394