Mercurial > audlegacy
annotate libguess/guess.c @ 1652:62c5bff8a05b trunk
[svn] - more hammering
author | nenolod |
---|---|
date | Thu, 07 Sep 2006 22:00:35 -0700 |
parents | e6cc84e06444 |
children | a9bc621d6b1b |
rev | line source |
---|---|
1105
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
1 /* |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
2 * This code is derivative of guess.c of Gauche-0.8.3. |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
3 * The following is the original copyright notice. |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
4 */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
5 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
6 /* |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
7 * guess.c - guessing character encoding |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
8 * |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
9 * Copyright (c) 2000-2003 Shiro Kawai, All rights reserved. |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
10 * |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
11 * Redistribution and use in source and binary forms, with or without |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
12 * modification, are permitted provided that the following conditions |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
13 * are met: |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
14 * |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
15 * 1. Redistributions of source code must retain the above copyright |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
16 * notice, this list of conditions and the following disclaimer. |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
17 * |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
18 * 2. Redistributions in binary form must reproduce the above copyright |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
19 * notice, this list of conditions and the following disclaimer in the |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
20 * documentation and/or other materials provided with the distribution. |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
21 * |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
22 * 3. Neither the name of the authors nor the names of its contributors |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
23 * may be used to endorse or promote products derived from this |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
24 * software without specific prior written permission. |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
25 * |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
30 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
31 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
32 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
33 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
34 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
35 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
36 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
37 * |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
38 */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
39 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
40 #include "libguess.h" |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
41 #define NULL ((void *)0) |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
42 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
43 /* take precedence if scores are same. */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
44 #undef PREFER_UTF8 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
45 #undef PREFER_SJIS |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
46 #undef PREFER_BIG5 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
47 #undef PREFER_GB18030 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
48 #undef PREFER_JOHAB |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
49 |
1595
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
50 /* workaround for that glib's g_convert can't convert properly from UCS-2BE/LE trailing after BOM. */ |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
51 #define WITH_G_CONVERT 1 |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
52 /* #undef WITH_G_CONVERT */ |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
53 |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
54 #ifdef WITH_G_CONVERT |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
55 const char UCS_2BE[]="UTF-16"; |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
56 const char UCS_2LE[]="UTF-16"; |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
57 #else |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
58 const char UCS_2BE[]="UCS-2BE"; |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
59 const char UCS_2LE[]="UCS-2LE"; |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
60 #endif |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
61 |
1105
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
62 /* data types */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
63 typedef struct guess_arc_rec { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
64 unsigned int next; /* next state */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
65 double score; /* score */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
66 } guess_arc; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
67 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
68 typedef struct guess_dfa_rec { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
69 signed char (*states)[256]; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
70 guess_arc *arcs; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
71 int state; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
72 double score; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
73 } guess_dfa; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
74 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
75 /* macros */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
76 #define DFA_INIT(st, ar) \ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
77 { st, ar, 0, 1.0 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
78 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
79 #define DFA_NEXT(dfa, ch) \ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
80 do { \ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
81 int arc__; \ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
82 if (dfa.state >= 0) { \ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
83 arc__ = dfa.states[dfa.state][ch]; \ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
84 if (arc__ < 0) { \ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
85 dfa.state = -1; \ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
86 } else { \ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
87 dfa.state = dfa.arcs[arc__].next; \ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
88 dfa.score *= dfa.arcs[arc__].score; \ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
89 } \ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
90 } \ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
91 } while (0) |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
92 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
93 #define DFA_ALIVE(dfa) (dfa.state >= 0) |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
94 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
95 /* include DFA table generated by guess.scm */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
96 #include "guess_tab.c" |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
97 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
98 const char *guess_jp(const char *buf, int buflen) |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
99 { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
100 int i; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
101 guess_dfa eucj = DFA_INIT(guess_eucj_st, guess_eucj_ar); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
102 guess_dfa sjis = DFA_INIT(guess_sjis_st, guess_sjis_ar); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
103 guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
104 guess_dfa *top = NULL; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
105 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
106 for (i=0; i<buflen; i++) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
107 int c = (unsigned char)buf[i]; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
108 |
1595
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
109 /* special treatment of iso-2022 escape sequence */ |
1105
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
110 if (c == 0x1b) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
111 if (i < buflen-1) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
112 c = (unsigned char)buf[++i]; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
113 if (c == '$' || c == '(') return "ISO-2022-JP"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
114 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
115 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
116 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
117 /* special treatment of BOM */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
118 if (i==0 && c == 0xff) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
119 if (i < buflen-1) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
120 c = (unsigned char)buf[i+1]; |
1595
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
121 if (c == 0xfe) return UCS_2LE; |
1105
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
122 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
123 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
124 if (i==0 && c == 0xfe) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
125 if (i < buflen-1) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
126 c = (unsigned char)buf[i+1]; |
1595
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
127 if (c == 0xff) return UCS_2BE; |
1105
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
128 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
129 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
130 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
131 if (DFA_ALIVE(eucj)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
132 if (!DFA_ALIVE(sjis) && !DFA_ALIVE(utf8)) return "EUC-JP"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
133 DFA_NEXT(eucj, c); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
134 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
135 if (DFA_ALIVE(sjis)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
136 if (!DFA_ALIVE(eucj) && !DFA_ALIVE(utf8)) return "SJIS"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
137 DFA_NEXT(sjis, c); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
138 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
139 if (DFA_ALIVE(utf8)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
140 if (!DFA_ALIVE(sjis) && !DFA_ALIVE(eucj)) return "UTF-8"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
141 DFA_NEXT(utf8, c); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
142 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
143 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
144 if (!DFA_ALIVE(eucj) && !DFA_ALIVE(sjis) && !DFA_ALIVE(utf8)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
145 /* we ran out the possibilities */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
146 return NULL; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
147 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
148 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
149 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
150 /* Now, we have ambigous code. Pick the highest score. If more than |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
151 one candidate tie, pick the default encoding. */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
152 if (DFA_ALIVE(eucj)) top = &eucj; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
153 if (DFA_ALIVE(utf8)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
154 if (top) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
155 #if defined PREFER_UTF8 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
156 if (top->score <= utf8.score) top = &utf8; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
157 #else |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
158 if (top->score < utf8.score) top = &utf8; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
159 #endif |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
160 } else { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
161 top = &utf8; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
162 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
163 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
164 if (DFA_ALIVE(sjis)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
165 if (top) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
166 #if defined PREFER_SJIS |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
167 if (top->score <= sjis.score) top = &sjis; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
168 #else |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
169 if (top->score < sjis.score) top = &sjis; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
170 #endif |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
171 } else { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
172 top = &sjis; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
173 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
174 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
175 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
176 if (top == &eucj) return "EUC-JP"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
177 if (top == &utf8) return "UTF-8"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
178 if (top == &sjis) return "SJIS"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
179 return NULL; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
180 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
181 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
182 const char *guess_tw(const char *buf, int buflen) |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
183 { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
184 int i; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
185 guess_dfa big5 = DFA_INIT(guess_big5_st, guess_big5_ar); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
186 guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
187 guess_dfa *top = NULL; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
188 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
189 for (i=0; i<buflen; i++) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
190 int c = (unsigned char)buf[i]; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
191 |
1595
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
192 /* special treatment of iso-2022 escape sequence */ |
1105
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
193 if (c == 0x1b) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
194 if (i < buflen-1) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
195 c = (unsigned char)buf[++i]; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
196 if (c == '$' || c == '(') return "ISO-2022-TW"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
197 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
198 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
199 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
200 /* special treatment of BOM */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
201 if (i==0 && c == 0xff) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
202 if (i < buflen-1) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
203 c = (unsigned char)buf[i+1]; |
1595
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
204 if (c == 0xfe) return UCS_2LE; |
1105
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
205 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
206 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
207 if (i==0 && c == 0xfe) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
208 if (i < buflen-1) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
209 c = (unsigned char)buf[i+1]; |
1595
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
210 if (c == 0xff) return UCS_2BE; |
1105
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
211 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
212 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
213 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
214 if (DFA_ALIVE(big5)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
215 if (!DFA_ALIVE(utf8)) return "BIG5"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
216 DFA_NEXT(big5, c); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
217 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
218 if (DFA_ALIVE(utf8)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
219 if (!DFA_ALIVE(big5)) return "UTF-8"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
220 DFA_NEXT(utf8, c); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
221 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
222 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
223 if (!DFA_ALIVE(big5) && !DFA_ALIVE(utf8)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
224 /* we ran out the possibilities */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
225 return NULL; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
226 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
227 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
228 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
229 /* Now, we have ambigous code. Pick the highest score. If more than |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
230 one candidate tie, pick the default encoding. */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
231 if (DFA_ALIVE(big5)) top = &big5; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
232 if (DFA_ALIVE(utf8)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
233 if (top) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
234 #if defined PREFER_UTF8 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
235 if (top->score <= utf8.score) top = &utf8; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
236 #else |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
237 if (top->score < utf8.score) top = &utf8; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
238 #endif |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
239 } else { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
240 top = &utf8; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
241 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
242 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
243 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
244 if (top == &big5) return "BIG5"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
245 if (top == &utf8) return "UTF-8"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
246 return NULL; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
247 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
248 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
249 const char *guess_cn(const char *buf, int buflen) |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
250 { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
251 int i; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
252 guess_dfa gb2312 = DFA_INIT(guess_gb2312_st, guess_gb2312_ar); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
253 guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
254 guess_dfa gb18030 = DFA_INIT(guess_gb18030_st, guess_gb18030_ar); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
255 guess_dfa *top = NULL; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
256 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
257 for (i=0; i<buflen; i++) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
258 int c = (unsigned char)buf[i]; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
259 int c2; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
260 |
1595
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
261 /* special treatment of iso-2022 escape sequence */ |
1105
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
262 if (c == 0x1b) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
263 if (i < buflen-1) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
264 c = (unsigned char)buf[i+1]; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
265 c2 = (unsigned char)buf[i+2]; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
266 if (c == '$' && (c2 == ')' || c2 == '+')) return "ISO-2022-CN"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
267 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
268 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
269 |
1595
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
270 /* special treatment of BOM */ |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
271 if (i==0 && c == 0xff) { |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
272 if (i < buflen-1) { |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
273 c = (unsigned char)buf[i+1]; |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
274 if (c == 0xfe) return UCS_2LE; |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
275 } |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
276 } |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
277 if (i==0 && c == 0xfe) { |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
278 if (i < buflen-1) { |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
279 c = (unsigned char)buf[i+1]; |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
280 if (c == 0xff) return UCS_2BE; |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
281 } |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
282 } |
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
283 |
1105
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
284 if (DFA_ALIVE(gb2312)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
285 if (!DFA_ALIVE(utf8) && !DFA_ALIVE(gb18030)) return "GB2312"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
286 DFA_NEXT(gb2312, c); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
287 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
288 if (DFA_ALIVE(utf8)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
289 if (!DFA_ALIVE(gb2312) && !DFA_ALIVE(gb18030)) return "UTF-8"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
290 DFA_NEXT(utf8, c); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
291 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
292 if (DFA_ALIVE(gb18030)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
293 if (!DFA_ALIVE(utf8) && !DFA_ALIVE(gb2312)) return "GB18030"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
294 DFA_NEXT(gb18030, c); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
295 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
296 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
297 if (!DFA_ALIVE(gb2312) && !DFA_ALIVE(utf8) && !DFA_ALIVE(gb18030)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
298 /* we ran out the possibilities */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
299 return NULL; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
300 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
301 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
302 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
303 /* Now, we have ambigous code. Pick the highest score. If more than |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
304 one candidate tie, pick the default encoding. */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
305 if (DFA_ALIVE(gb2312)) top = &gb2312; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
306 if (DFA_ALIVE(utf8)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
307 if (top) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
308 #if defined PREFER_UTF8 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
309 if (top->score <= utf8.score) top = &utf8; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
310 #else |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
311 if (top->score < utf8.score) top = &utf8; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
312 #endif |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
313 } else { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
314 top = &utf8; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
315 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
316 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
317 if (DFA_ALIVE(gb18030)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
318 if (top) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
319 #if defined PREFER_GB18030 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
320 if (top->score <= gb18030.score) top = &gb18030; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
321 #else |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
322 if (top->score < gb18030.score) top = &gb18030; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
323 #endif |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
324 } else { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
325 top = &gb18030; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
326 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
327 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
328 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
329 if (top == &gb2312) return "GB2312"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
330 if (top == &utf8) return "UTF-8"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
331 if (top == &gb18030) return "GB18030"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
332 return NULL; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
333 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
334 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
335 const char *guess_kr(const char *buf, int buflen) |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
336 { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
337 int i; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
338 guess_dfa euck = DFA_INIT(guess_euck_st, guess_euck_ar); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
339 guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
340 guess_dfa johab = DFA_INIT(guess_johab_st, guess_johab_ar); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
341 guess_dfa *top = NULL; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
342 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
343 for (i=0; i<buflen; i++) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
344 int c = (unsigned char)buf[i]; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
345 int c2; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
346 |
1595
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
347 /* special treatment of iso-2022 escape sequence */ |
1105
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
348 if (c == 0x1b) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
349 if (i < buflen-1) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
350 c = (unsigned char)buf[i+1]; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
351 c2 = (unsigned char)buf[i+2]; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
352 if (c == '$' && c2 == ')') return "ISO-2022-KR"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
353 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
354 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
355 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
356 /* special treatment of BOM */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
357 if (i==0 && c == 0xff) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
358 if (i < buflen-1) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
359 c = (unsigned char)buf[i+1]; |
1595
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
360 if (c == 0xfe) return UCS_2LE; |
1105
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
361 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
362 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
363 if (i==0 && c == 0xfe) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
364 if (i < buflen-1) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
365 c = (unsigned char)buf[i+1]; |
1595
e6cc84e06444
[svn] - libguess update: workaround for that g_convert can't convert properly from UCS-2BE/LE trailing after BOM.
yaz
parents:
1105
diff
changeset
|
366 if (c == 0xff) return UCS_2BE; |
1105
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
367 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
368 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
369 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
370 if (DFA_ALIVE(euck)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
371 if (!DFA_ALIVE(johab) && !DFA_ALIVE(utf8)) return "EUC-KR"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
372 DFA_NEXT(euck, c); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
373 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
374 if (DFA_ALIVE(johab)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
375 if (!DFA_ALIVE(euck) && !DFA_ALIVE(utf8)) return "JOHAB"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
376 DFA_NEXT(johab, c); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
377 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
378 if (DFA_ALIVE(utf8)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
379 if (!DFA_ALIVE(euck) && !DFA_ALIVE(johab)) return "UTF-8"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
380 DFA_NEXT(utf8, c); |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
381 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
382 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
383 if (!DFA_ALIVE(euck) && !DFA_ALIVE(johab) && !DFA_ALIVE(utf8)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
384 /* we ran out the possibilities */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
385 return NULL; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
386 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
387 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
388 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
389 /* Now, we have ambigous code. Pick the highest score. If more than |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
390 one candidate tie, pick the default encoding. */ |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
391 if (DFA_ALIVE(euck)) top = &euck; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
392 if (DFA_ALIVE(utf8)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
393 if (top) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
394 #if defined PREFER_UTF8 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
395 if (top->score <= utf8.score) top = &utf8; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
396 #else |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
397 if (top->score < utf8.score) top = &utf8; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
398 #endif |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
399 } else { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
400 top = &utf8; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
401 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
402 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
403 if (DFA_ALIVE(johab)) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
404 if (top) { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
405 #if defined PREFER_JOAHB |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
406 if (top->score <= johab.score) top = &johab; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
407 #else |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
408 if (top->score < johab.score) top = &johab; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
409 #endif |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
410 } else { |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
411 top = &johab; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
412 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
413 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
414 |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
415 if (top == &euck) return "EUC-KR"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
416 if (top == &utf8) return "UTF-8"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
417 if (top == &johab) return "JOHAB"; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
418 return NULL; |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
419 } |
4be4d74db123
[svn] automatic character encoding detector for id3 metadata. --enable-chardet enables this feature.
yaz
parents:
diff
changeset
|
420 |