Mercurial > emacs
view admin/charsets/mapconv @ 98182:19ec1646fe6c
The Rmail/mbox merge has been abandoned in favor of a restart using
the current rmail.el file. A comprehensive list of changes will be
supplied when pmail.el is morphed back into rmail.el
The current status is that pmail.el supports basic Rmail navigation
(no summary support) and shows the current message in a special
buffer using buffer-swap-text. No decoding is done yet. That is the
next step.
author | Paul Reilly <pmr@pajato.com> |
---|---|
date | Mon, 15 Sep 2008 20:56:53 +0000 |
parents | eb2d9dfc8486 |
children | ce88a631c161 |
line wrap: on
line source
#!/bin/sh # Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 # National Institute of Advanced Industrial Science and Technology (AIST) # Registration Number H13PRO009 # This file is part of GNU Emacs. # GNU Emacs is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # GNU Emacs is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. # Commentary: # Convert charset map of various format into this: # 0xXX 0xYYYY # where, # XX is a code point of the charset in hexa-decimal, # YYYY is the corresponding Unicode character code in hexa-decimal. # Arguments are: # $1: source map file # $2: address pattern for sed (optionally with substitution command) # $3: format of source map file # GLIBC-1 GLIBC-2 GLIBC-2-7 CZYBORRA IANA UNICODE YASUOKA MICROSOFT # $4: awk script BASE=`basename $1` case "$3" in GLIBC*) SOURCE="glibc-2.3.2/localedata/charmaps/${BASE}";; CZYBORRA) SOURCE="http://czyborra.com/charsets/${BASE}";; IANA) SOURCE="http://www.iana.org/assignments/charset-reg/${BASE}";; UNICODE) SOURCE="http://www.unicode.org/Public/MAPPINGS/.../${BASE}";; UNICODE2) SOURCE="http://www.unicode.org/Public/MAPPINGS/.../${BASE}";; YASUOKA) SOURCE="http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/.../${BASE}";; MICROSOFT) SOURCE="http://www.microsoft.com/globaldev/reference/oem/${BASE}";; KANJI-DATABASE) SOURCE="data at http://sourceforge.net/cvs/?group_id=26261";; *) echo "Unknown file type: $3"; exit 1;; esac echo "# Generated from $SOURCE" if [ -n "$4" ] ; then if [ -f "$4" ] ; then AWKPROG="gawk -f $4" else echo "Awk program does not exist: $4" exit 1 fi else AWKPROG=cat fi if [ "$3" == "GLIBC-1" ] ; then # Source format is: # <UYYYY> /xXX sed -n -e "$2 p" < $1 \ | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\).*,0x\2 0x\1,' \ | sort | ${AWKPROG} elif [ "$3" == "GLIBC-2" ] ; then # Source format is: # <UYYYY> /xXX/xZZ sed -n -e "$2 p" < $1 \ | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \ | sort | ${AWKPROG} elif [ "$3" == "GLIBC-2-7" ] ; then # Source format is: # <UYYYY> /xXX/xZZ # We must drop MSBs of XX and ZZ sed -n -e "$2 p" < $1 \ | sed -e 's/xa/x2/g' -e 's/xb/x3/g' -e 's/xc/x4/g' \ -e 's/xd/x5/g' -e 's/xe/x6/g' -e 's/xf/x7/g' \ -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \ | tee temp \ | sort | ${AWKPROG} elif [ "$3" == "CZYBORRA" ] ; then # Source format is: # =XX U+YYYY zcat $1 | sed -n -e "$2 p" \ | sed -e 's/=\(..\)[^U]*U+\([0-9A-F]*\).*/0x\1 0x\2/' \ | sort | ${AWKPROG} elif [ "$3" == "IANA" ] ; then # Source format is: # 0xXX 0xYYYY sed -n -e "$2 p" < $1 \ | sed -e 's/\(0x[0-9A-Fa-f]*\)[^0]*\(0x[0-9A-Fa-f]*\).*/\1 \2/' \ | sort | ${AWKPROG} elif [ "$3" == "UNICODE" ] ; then # Source format is: # YYYY XX sed -n -e "$2 p" < $1 \ | sed -e 's/\([0-9A-F]*\)[^0-9A-F]*\([0-9A-F]*\).*/0x\2 0x\1/' \ | sort | ${AWKPROG} elif [ "$3" == "UNICODE2" ] ; then # Source format is: # 0xXXXX 0xYYYY # ... sed -n -e "$2 p" < $1 \ | sed -e 's/\([0-9A-Fx]*\)[^0]*\([0-9A-Fx]*\).*/\1 \2/' \ | ${AWKPROG} | sort -n -k 4,4 elif [ "$3" == "YASUOKA" ] ; then # Source format is: # YYYY 0-XXXX (XXXX is a Kuten code) sed -n -e "$2 p" < $1 \ | sed -e 's/\([0-9A-F]*\)[^0]*0-\([0-9]*\).*/0x\2 0x\1/' \ | sort | ${AWKPROG} elif [ "$3" == "MICROSOFT" ] ; then # Source format is: # XX = U+YYYY sed -n -e "$2 p" < $1 \ | sed -e 's/\([0-9A-F]*\).*U+\([0-9A-F]*\).*/0x\1 0x\2/' \ | sort | ${AWKPROG} elif [ "$3" == "KANJI-DATABASE" ] ; then # Source format is: # C?-XXXX U+YYYYY ..... sed -n -e "$2 p" < $1 \ | sed -e 's/...\(....\) U+\([0-9A-F]*\).*/0x\1 0x\2/' \ | sort | ${AWKPROG} else echo "Invalid arguments" exit 1 fi # arch-tag: c33acb47-7eb6-4872-b871-15e1447e8f0e