Mercurial > emacs
annotate admin/charsets/mapconv @ 98182:19ec1646fe6c
The Rmail/mbox merge has been abandoned in favor of a restart using
the current rmail.el file. A comprehensive list of changes will be
supplied when pmail.el is morphed back into rmail.el
The current status is that pmail.el supports basic Rmail navigation
(no summary support) and shows the current message in a special
buffer using buffer-swap-text. No decoding is done yet. That is the
next step.
author | Paul Reilly <pmr@pajato.com> |
---|---|
date | Mon, 15 Sep 2008 20:56:53 +0000 |
parents | eb2d9dfc8486 |
children | ce88a631c161 |
rev | line source |
---|---|
88123 | 1 #!/bin/sh |
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91418
diff
changeset
|
2 |
91418
8ae4a64098f2
Update copyright years and GPL version.
Glenn Morris <rgm@gnu.org>
parents:
89916
diff
changeset
|
3 # Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 |
88123 | 4 # National Institute of Advanced Industrial Science and Technology (AIST) |
5 # Registration Number H13PRO009 | |
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91418
diff
changeset
|
6 |
88123 | 7 # This file is part of GNU Emacs. |
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91418
diff
changeset
|
8 |
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91418
diff
changeset
|
9 # GNU Emacs is free software: you can redistribute it and/or modify |
88123 | 10 # it under the terms of the GNU General Public License as published by |
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91418
diff
changeset
|
11 # the Free Software Foundation, either version 3 of the License, or |
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91418
diff
changeset
|
12 # (at your option) any later version. |
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91418
diff
changeset
|
13 |
88123 | 14 # GNU Emacs is distributed in the hope that it will be useful, |
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 # GNU General Public License for more details. | |
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91418
diff
changeset
|
18 |
88123 | 19 # You should have received a copy of the GNU General Public License |
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91418
diff
changeset
|
20 # along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
88123 | 21 |
94832
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91418
diff
changeset
|
22 # Commentary: |
eb2d9dfc8486
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
91418
diff
changeset
|
23 |
88123 | 24 # Convert charset map of various format into this: |
25 # 0xXX 0xYYYY | |
26 # where, | |
27 # XX is a code point of the charset in hexa-decimal, | |
28 # YYYY is the corresponding Unicode character code in hexa-decimal. | |
29 # Arguments are: | |
30 # $1: source map file | |
31 # $2: address pattern for sed (optionally with substitution command) | |
32 # $3: format of source map file | |
33 # GLIBC-1 GLIBC-2 GLIBC-2-7 CZYBORRA IANA UNICODE YASUOKA MICROSOFT | |
34 # $4: awk script | |
35 | |
36 BASE=`basename $1` | |
37 | |
38 case "$3" in | |
39 GLIBC*) | |
89823 | 40 SOURCE="glibc-2.3.2/localedata/charmaps/${BASE}";; |
88123 | 41 CZYBORRA) |
42 SOURCE="http://czyborra.com/charsets/${BASE}";; | |
43 IANA) | |
44 SOURCE="http://www.iana.org/assignments/charset-reg/${BASE}";; | |
45 UNICODE) | |
46 SOURCE="http://www.unicode.org/Public/MAPPINGS/.../${BASE}";; | |
89742 | 47 UNICODE2) |
48 SOURCE="http://www.unicode.org/Public/MAPPINGS/.../${BASE}";; | |
88123 | 49 YASUOKA) |
50 SOURCE="http://kanji.zinbun.kyoto-u.ac.jp/~yasuoka/.../${BASE}";; | |
51 MICROSOFT) | |
52 SOURCE="http://www.microsoft.com/globaldev/reference/oem/${BASE}";; | |
89815
59bbae51ad73
Add code for handling KANJI-DATABASE format.
Kenichi Handa <handa@m17n.org>
parents:
89746
diff
changeset
|
53 KANJI-DATABASE) |
59bbae51ad73
Add code for handling KANJI-DATABASE format.
Kenichi Handa <handa@m17n.org>
parents:
89746
diff
changeset
|
54 SOURCE="data at http://sourceforge.net/cvs/?group_id=26261";; |
88123 | 55 *) |
56 echo "Unknown file type: $3"; | |
57 exit 1;; | |
58 esac | |
59 | |
60 echo "# Generated from $SOURCE" | |
61 | |
62 if [ -n "$4" ] ; then | |
63 if [ -f "$4" ] ; then | |
64 AWKPROG="gawk -f $4" | |
65 else | |
66 echo "Awk program does not exist: $4" | |
67 exit 1 | |
68 fi | |
69 else | |
70 AWKPROG=cat | |
71 fi | |
72 | |
73 if [ "$3" == "GLIBC-1" ] ; then | |
74 # Source format is: | |
75 # <UYYYY> /xXX | |
76 sed -n -e "$2 p" < $1 \ | |
77 | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\).*,0x\2 0x\1,' \ | |
78 | sort | ${AWKPROG} | |
79 elif [ "$3" == "GLIBC-2" ] ; then | |
80 # Source format is: | |
81 # <UYYYY> /xXX/xZZ | |
82 sed -n -e "$2 p" < $1 \ | |
83 | sed -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \ | |
84 | sort | ${AWKPROG} | |
85 elif [ "$3" == "GLIBC-2-7" ] ; then | |
86 # Source format is: | |
87 # <UYYYY> /xXX/xZZ | |
88 # We must drop MSBs of XX and ZZ | |
89 sed -n -e "$2 p" < $1 \ | |
90 | sed -e 's/xa/x2/g' -e 's/xb/x3/g' -e 's/xc/x4/g' \ | |
91 -e 's/xd/x5/g' -e 's/xe/x6/g' -e 's/xf/x7/g' \ | |
92 -e 's,<U\([^>]*\)>[ ]*/x\(..\)/x\(..\).*,0x\2\3 0x\1,' \ | |
93 | tee temp \ | |
94 | sort | ${AWKPROG} | |
95 elif [ "$3" == "CZYBORRA" ] ; then | |
96 # Source format is: | |
97 # =XX U+YYYY | |
98 zcat $1 | sed -n -e "$2 p" \ | |
99 | sed -e 's/=\(..\)[^U]*U+\([0-9A-F]*\).*/0x\1 0x\2/' \ | |
100 | sort | ${AWKPROG} | |
101 elif [ "$3" == "IANA" ] ; then | |
102 # Source format is: | |
103 # 0xXX 0xYYYY | |
104 sed -n -e "$2 p" < $1 \ | |
105 | sed -e 's/\(0x[0-9A-Fa-f]*\)[^0]*\(0x[0-9A-Fa-f]*\).*/\1 \2/' \ | |
106 | sort | ${AWKPROG} | |
107 elif [ "$3" == "UNICODE" ] ; then | |
108 # Source format is: | |
109 # YYYY XX | |
110 sed -n -e "$2 p" < $1 \ | |
111 | sed -e 's/\([0-9A-F]*\)[^0-9A-F]*\([0-9A-F]*\).*/0x\2 0x\1/' \ | |
112 | sort | ${AWKPROG} | |
89742 | 113 elif [ "$3" == "UNICODE2" ] ; then |
114 # Source format is: | |
115 # 0xXXXX 0xYYYY # ... | |
116 sed -n -e "$2 p" < $1 \ | |
117 | sed -e 's/\([0-9A-Fx]*\)[^0]*\([0-9A-Fx]*\).*/\1 \2/' \ | |
89746
b7afe7c870d3
In UNICODE2 case, sort by 4th field after
Kenichi Handa <handa@m17n.org>
parents:
89742
diff
changeset
|
118 | ${AWKPROG} | sort -n -k 4,4 |
88123 | 119 elif [ "$3" == "YASUOKA" ] ; then |
120 # Source format is: | |
121 # YYYY 0-XXXX (XXXX is a Kuten code) | |
122 sed -n -e "$2 p" < $1 \ | |
123 | sed -e 's/\([0-9A-F]*\)[^0]*0-\([0-9]*\).*/0x\2 0x\1/' \ | |
124 | sort | ${AWKPROG} | |
125 elif [ "$3" == "MICROSOFT" ] ; then | |
126 # Source format is: | |
127 # XX = U+YYYY | |
128 sed -n -e "$2 p" < $1 \ | |
129 | sed -e 's/\([0-9A-F]*\).*U+\([0-9A-F]*\).*/0x\1 0x\2/' \ | |
130 | sort | ${AWKPROG} | |
89815
59bbae51ad73
Add code for handling KANJI-DATABASE format.
Kenichi Handa <handa@m17n.org>
parents:
89746
diff
changeset
|
131 elif [ "$3" == "KANJI-DATABASE" ] ; then |
59bbae51ad73
Add code for handling KANJI-DATABASE format.
Kenichi Handa <handa@m17n.org>
parents:
89746
diff
changeset
|
132 # Source format is: |
59bbae51ad73
Add code for handling KANJI-DATABASE format.
Kenichi Handa <handa@m17n.org>
parents:
89746
diff
changeset
|
133 # C?-XXXX U+YYYYY ..... |
59bbae51ad73
Add code for handling KANJI-DATABASE format.
Kenichi Handa <handa@m17n.org>
parents:
89746
diff
changeset
|
134 sed -n -e "$2 p" < $1 \ |
59bbae51ad73
Add code for handling KANJI-DATABASE format.
Kenichi Handa <handa@m17n.org>
parents:
89746
diff
changeset
|
135 | sed -e 's/...\(....\) U+\([0-9A-F]*\).*/0x\1 0x\2/' \ |
59bbae51ad73
Add code for handling KANJI-DATABASE format.
Kenichi Handa <handa@m17n.org>
parents:
89746
diff
changeset
|
136 | sort | ${AWKPROG} |
88123 | 137 else |
138 echo "Invalid arguments" | |
139 exit 1 | |
140 fi | |
89916
e0e4e6a0599f
Changes from arch/CVS synchronization
Miles Bader <miles@gnu.org>
parents:
89823
diff
changeset
|
141 |
e0e4e6a0599f
Changes from arch/CVS synchronization
Miles Bader <miles@gnu.org>
parents:
89823
diff
changeset
|
142 # arch-tag: c33acb47-7eb6-4872-b871-15e1447e8f0e |