comparison src/libid3tag/utf16.c @ 2503:10692383c103 trunk

[svn] first try for libid3tag integration. this improved libid3tag supports vfs operations and is capable of adding id3v2 tag to files which doesn't have id3v2 tag ever.
author yaz
date Sun, 11 Feb 2007 05:19:07 -0800
parents
children
comparison
equal deleted inserted replaced
2502:b7be0af74307 2503:10692383c103
1 /*
2 * libid3tag - ID3 tag manipulation library
3 * Copyright (C) 2000-2004 Underbit Technologies, Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 *
19 * $Id: utf16.c,v 1.9 2004/01/23 09:41:32 rob Exp $
20 */
21
22 # ifdef HAVE_CONFIG_H
23 # include "config.h"
24 # endif
25
26 # include "global.h"
27
28 # include <stdlib.h>
29
30 # include "id3tag.h"
31 # include "utf16.h"
32 # include "ucs4.h"
33
34 /*
35 * NAME: utf16->length()
36 * DESCRIPTION: return the number of ucs4 chars represented by a utf16 string
37 */
38 id3_length_t id3_utf16_length(id3_utf16_t const *utf16)
39 {
40 id3_length_t length = 0;
41
42 while (*utf16) {
43 if (utf16[0] < 0xd800 || utf16[0] > 0xdfff)
44 ++length;
45 else if (utf16[0] >= 0xd800 && utf16[0] <= 0xdbff &&
46 utf16[1] >= 0xdc00 && utf16[1] <= 0xdfff) {
47 ++length;
48 ++utf16;
49 }
50
51 ++utf16;
52 }
53
54 return length;
55 }
56
57 /*
58 * NAME: utf16->size()
59 * DESCRIPTION: return the encoding size of a utf16 string
60 */
61 id3_length_t id3_utf16_size(id3_utf16_t const *utf16)
62 {
63 id3_utf16_t const *ptr = utf16;
64
65 while (*ptr)
66 ++ptr;
67
68 return ptr - utf16 + 1;
69 }
70
71 /*
72 * NAME: utf16->ucs4duplicate()
73 * DESCRIPTION: duplicate and decode a utf16 string into ucs4
74 */
75 id3_ucs4_t *id3_utf16_ucs4duplicate(id3_utf16_t const *utf16)
76 {
77 id3_ucs4_t *ucs4;
78
79 ucs4 = malloc((id3_utf16_length(utf16) + 1) * sizeof(*ucs4));
80 if (ucs4)
81 id3_utf16_decode(utf16, ucs4);
82
83 return release(ucs4);
84 }
85
86 /*
87 * NAME: utf16->decodechar()
88 * DESCRIPTION: decode a series of utf16 chars into a single ucs4 char
89 */
90 id3_length_t id3_utf16_decodechar(id3_utf16_t const *utf16, id3_ucs4_t *ucs4)
91 {
92 id3_utf16_t const *start = utf16;
93
94 while (1) {
95 if (utf16[0] < 0xd800 || utf16[0] > 0xdfff) {
96 *ucs4 = utf16[0];
97 return utf16 - start + 1;
98 }
99 else if (utf16[0] >= 0xd800 && utf16[0] <= 0xdbff &&
100 utf16[1] >= 0xdc00 && utf16[1] <= 0xdfff) {
101 *ucs4 = (((utf16[0] & 0x03ffL) << 10) |
102 ((utf16[1] & 0x03ffL) << 0)) + 0x00010000L;
103 return utf16 - start + 2;
104 }
105
106 ++utf16;
107 }
108 }
109
110 /*
111 * NAME: utf16->encodechar()
112 * DESCRIPTION: encode a single ucs4 char into a series of up to 2 utf16 chars
113 */
114 id3_length_t id3_utf16_encodechar(id3_utf16_t *utf16, id3_ucs4_t ucs4)
115 {
116 if (ucs4 < 0x00010000L) {
117 utf16[0] = ucs4;
118
119 return 1;
120 }
121 else if (ucs4 < 0x00110000L) {
122 ucs4 -= 0x00010000L;
123
124 utf16[0] = ((ucs4 >> 10) & 0x3ff) | 0xd800;
125 utf16[1] = ((ucs4 >> 0) & 0x3ff) | 0xdc00;
126
127 return 2;
128 }
129
130 /* default */
131
132 return id3_utf16_encodechar(utf16, ID3_UCS4_REPLACEMENTCHAR);
133 }
134
135 /*
136 * NAME: utf16->decode()
137 * DESCRIPTION: decode a complete utf16 string into a ucs4 string
138 */
139 void id3_utf16_decode(id3_utf16_t const *utf16, id3_ucs4_t *ucs4)
140 {
141 do
142 utf16 += id3_utf16_decodechar(utf16, ucs4);
143 while (*ucs4++);
144 }
145
146 /*
147 * NAME: utf16->encode()
148 * DESCRIPTION: encode a complete ucs4 string into a utf16 string
149 */
150 void id3_utf16_encode(id3_utf16_t *utf16, id3_ucs4_t const *ucs4)
151 {
152 do
153 utf16 += id3_utf16_encodechar(utf16, *ucs4);
154 while (*ucs4++);
155 }
156
157 /*
158 * NAME: utf16->put()
159 * DESCRIPTION: serialize a single utf16 character
160 */
161 id3_length_t id3_utf16_put(id3_byte_t **ptr, id3_utf16_t utf16,
162 enum id3_utf16_byteorder byteorder)
163 {
164 if (ptr) {
165 switch (byteorder) {
166 default:
167 case ID3_UTF16_BYTEORDER_BE:
168 (*ptr)[0] = (utf16 >> 8) & 0xff;
169 (*ptr)[1] = (utf16 >> 0) & 0xff;
170 break;
171
172 case ID3_UTF16_BYTEORDER_LE:
173 (*ptr)[0] = (utf16 >> 0) & 0xff;
174 (*ptr)[1] = (utf16 >> 8) & 0xff;
175 break;
176 }
177
178 *ptr += 2;
179 }
180
181 return 2;
182 }
183
184 /*
185 * NAME: utf16->get()
186 * DESCRIPTION: deserialize a single utf16 character
187 */
188 id3_utf16_t id3_utf16_get(id3_byte_t const **ptr,
189 enum id3_utf16_byteorder byteorder)
190 {
191 id3_utf16_t utf16;
192
193 switch (byteorder) {
194 default:
195 case ID3_UTF16_BYTEORDER_BE:
196 utf16 =
197 ((*ptr)[0] << 8) |
198 ((*ptr)[1] << 0);
199 break;
200
201 case ID3_UTF16_BYTEORDER_LE:
202 utf16 =
203 ((*ptr)[0] << 0) |
204 ((*ptr)[1] << 8);
205 break;
206 }
207
208 *ptr += 2;
209
210 return utf16;
211 }
212
213 /*
214 * NAME: utf16->serialize()
215 * DESCRIPTION: serialize a ucs4 string using utf16 encoding
216 */
217 id3_length_t id3_utf16_serialize(id3_byte_t **ptr, id3_ucs4_t const *ucs4,
218 enum id3_utf16_byteorder byteorder,
219 int terminate)
220 {
221 id3_length_t size = 0;
222 id3_utf16_t utf16[2], *out;
223
224 if (byteorder == ID3_UTF16_BYTEORDER_ANY)
225 size += id3_utf16_put(ptr, 0xfeff, byteorder);
226
227 while (*ucs4) {
228 switch (id3_utf16_encodechar(out = utf16, *ucs4++)) {
229 case 2: size += id3_utf16_put(ptr, *out++, byteorder);
230 case 1: size += id3_utf16_put(ptr, *out++, byteorder);
231 case 0: break;
232 }
233 }
234
235 if (terminate)
236 size += id3_utf16_put(ptr, 0, byteorder);
237
238 return size;
239 }
240
241 /*
242 * NAME: utf16->deserialize()
243 * DESCRIPTION: deserialize a ucs4 string using utf16 encoding
244 */
245 id3_ucs4_t *id3_utf16_deserialize(id3_byte_t const **ptr, id3_length_t length,
246 enum id3_utf16_byteorder byteorder)
247 {
248 id3_byte_t const *end;
249 id3_utf16_t *utf16ptr, *utf16;
250 id3_ucs4_t *ucs4;
251
252 end = *ptr + (length & ~1);
253
254 utf16 = malloc((length / 2 + 1) * sizeof(*utf16));
255 if (utf16 == 0)
256 return 0;
257
258 if (byteorder == ID3_UTF16_BYTEORDER_ANY && end - *ptr > 0) {
259 switch (((*ptr)[0] << 8) |
260 ((*ptr)[1] << 0)) {
261 case 0xfeff:
262 byteorder = ID3_UTF16_BYTEORDER_BE;
263 *ptr += 2;
264 break;
265
266 case 0xfffe:
267 byteorder = ID3_UTF16_BYTEORDER_LE;
268 *ptr += 2;
269 break;
270 }
271 }
272
273 utf16ptr = utf16;
274 while (end - *ptr > 0 && (*utf16ptr = id3_utf16_get(ptr, byteorder)))
275 ++utf16ptr;
276
277 *utf16ptr = 0;
278
279 ucs4 = malloc((id3_utf16_length(utf16) + 1) * sizeof(*ucs4));
280 if (ucs4)
281 id3_utf16_decode(utf16, ucs4);
282
283 free(utf16);
284
285 return ucs4;
286 }