Mercurial > pidgin.yaz
comparison libpurple/protocols/gg/lib/encoding.c @ 31860:93b08d43f684
matekm and kkszysiu collaborated on this patch to update our internal libgadu
to version 1.10.1.
author | John Bailey <rekkanoryo@rekkanoryo.org> |
---|---|
date | Thu, 24 Mar 2011 20:53:13 +0000 |
parents | |
children | 3a90a59ddea2 |
comparison
equal
deleted
inserted
replaced
31859:5043fc53f957 | 31860:93b08d43f684 |
---|---|
1 /* | |
2 * (C) Copyright 2008-2009 Jakub Zawadzki <darkjames@darkjames.ath.cx> | |
3 * Wojtek Kaniewski <wojtekka@irc.pl> | |
4 * | |
5 * This program is free software; you can redistribute it and/or modify | |
6 * it under the terms of the GNU Lesser General Public License Version | |
7 * 2.1 as published by the Free Software Foundation. | |
8 * | |
9 * This program is distributed in the hope that it will be useful, | |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 * GNU Lesser General Public License for more details. | |
13 * | |
14 * You should have received a copy of the GNU Lesser General Public | |
15 * License along with this program; if not, write to the Free Software | |
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, | |
17 * USA. | |
18 */ | |
19 | |
20 #include <stdlib.h> | |
21 #include <string.h> | |
22 #include <errno.h> | |
23 | |
24 #include "libgadu.h" | |
25 | |
26 /** | |
27 * \file encoding.c | |
28 * | |
29 * \brief Funkcje konwersji kodowania tekstu | |
30 */ | |
31 | |
32 /** | |
33 * \internal Tablica konwersji CP1250 na Unikod. | |
34 */ | |
35 static const uint16_t table_cp1250[] = | |
36 { | |
37 0x20ac, '?', 0x201a, '?', 0x201e, 0x2026, 0x2020, 0x2021, | |
38 '?', 0x2030, 0x0160, 0x2039, 0x015a, 0x0164, 0x017d, 0x0179, | |
39 '?', 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, | |
40 '?', 0x2122, 0x0161, 0x203a, 0x015b, 0x0165, 0x017e, 0x017a, | |
41 0x00a0, 0x02c7, 0x02d8, 0x0141, 0x00a4, 0x0104, 0x00a6, 0x00a7, | |
42 0x00a8, 0x00a9, 0x015e, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x017b, | |
43 0x00b0, 0x00b1, 0x02db, 0x0142, 0x00b4, 0x00b5, 0x00b6, 0x00b7, | |
44 0x00b8, 0x0105, 0x015f, 0x00bb, 0x013d, 0x02dd, 0x013e, 0x017c, | |
45 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, | |
46 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, | |
47 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, | |
48 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, | |
49 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, | |
50 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, | |
51 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, | |
52 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, | |
53 }; | |
54 | |
55 /** | |
56 * \internal Zamienia tekst kodowany CP1250 na UTF-8. | |
57 * | |
58 * \param src Tekst źródłowy w CP1250. | |
59 * \param src_length Długość ciągu źródłowego (nigdy ujemna). | |
60 * \param dst_length Długość ciągu docelowego (jeśli -1, nieograniczona). | |
61 * | |
62 * \return Zaalokowany bufor z tekstem w UTF-8. | |
63 */ | |
64 static char *gg_encoding_convert_cp1250_utf8(const char *src, int src_length, int dst_length) | |
65 { | |
66 int i, j, len; | |
67 char *result = NULL; | |
68 | |
69 for (i = 0, len = 0; (src[i] != 0) && (i < src_length); i++) { | |
70 uint16_t uc; | |
71 | |
72 if ((unsigned char) src[i] < 0x80) | |
73 uc = (unsigned char) src[i]; | |
74 else | |
75 uc = table_cp1250[(unsigned char) src[i] - 128]; | |
76 | |
77 if (uc < 0x80) | |
78 len += 1; | |
79 else if (uc < 0x800) | |
80 len += 2; | |
81 else | |
82 len += 3; | |
83 } | |
84 | |
85 if ((dst_length != -1) && (len > dst_length)) | |
86 len = dst_length; | |
87 | |
88 result = malloc(len + 1); | |
89 | |
90 if (result == NULL) | |
91 return NULL; | |
92 | |
93 for (i = 0, j = 0; (src[i] != 0) && (i < src_length) && (j < len); i++) { | |
94 uint16_t uc; | |
95 | |
96 if ((unsigned char) src[i] < 0x80) | |
97 uc = (unsigned char) src[i]; | |
98 else | |
99 uc = table_cp1250[(unsigned char) src[i] - 128]; | |
100 | |
101 if (uc < 0x80) | |
102 result[j++] = uc; | |
103 else if (uc < 0x800) { | |
104 if (j + 1 > len) | |
105 break; | |
106 result[j++] = 0xc0 | ((uc >> 6) & 0x1f); | |
107 result[j++] = 0x80 | (uc & 0x3f); | |
108 } else { | |
109 if (j + 2 > len) | |
110 break; | |
111 result[j++] = 0xe0 | ((uc >> 12) & 0x1f); | |
112 result[j++] = 0x80 | ((uc >> 6) & 0x3f); | |
113 result[j++] = 0x80 | (uc & 0x3f); | |
114 } | |
115 } | |
116 | |
117 result[j] = 0; | |
118 | |
119 return result; | |
120 } | |
121 | |
122 /** | |
123 * \internal Zamienia tekst kodowany UTF-8 na CP1250. | |
124 * | |
125 * \param src Tekst źródłowy w UTF-8. | |
126 * \param src_length Długość ciągu źródłowego (nigdy ujemna). | |
127 * \param dst_length Długość ciągu docelowego (jeśli -1, nieograniczona). | |
128 * | |
129 * \return Zaalokowany bufor z tekstem w CP1250. | |
130 */ | |
131 static char *gg_encoding_convert_utf8_cp1250(const char *src, int src_length, int dst_length) | |
132 { | |
133 char *result; | |
134 int i, j, len, uc_left = 0; | |
135 uint32_t uc = 0, uc_min = 0; | |
136 | |
137 for (i = 0, len = 0; (src[i] != 0) && (i < src_length); i++) { | |
138 if ((src[i] & 0xc0) == 0xc0) { | |
139 len++; | |
140 } else if ((src[i] & 0x80) == 0x00) { | |
141 len++; | |
142 } | |
143 } | |
144 | |
145 if ((dst_length != -1) && (len > dst_length)) | |
146 len = dst_length; | |
147 | |
148 result = malloc(len + 1); | |
149 | |
150 if (result == NULL) | |
151 return NULL; | |
152 | |
153 for (i = 0, j = 0; (src[i] != 0) && (i < src_length) && (j < len); i++) { | |
154 if ((unsigned char) src[i] >= 0xf5) { | |
155 if (uc_left != 0) | |
156 result[j++] = '?'; | |
157 /* Restricted sequences */ | |
158 result[j++] = '?'; | |
159 uc_left = 0; | |
160 } else if ((src[i] & 0xf8) == 0xf0) { | |
161 if (uc_left != 0) | |
162 result[j++] = '?'; | |
163 uc = src[i] & 0x07; | |
164 uc_left = 3; | |
165 uc_min = 0x10000; | |
166 } else if ((src[i] & 0xf0) == 0xe0) { | |
167 if (uc_left != 0) | |
168 result[j++] = '?'; | |
169 uc = src[i] & 0x0f; | |
170 uc_left = 2; | |
171 uc_min = 0x800; | |
172 } else if ((src[i] & 0xe0) == 0xc0) { | |
173 if (uc_left != 0) | |
174 result[j++] = '?'; | |
175 uc = src[i] & 0x1f; | |
176 uc_left = 1; | |
177 uc_min = 0x80; | |
178 } else if ((src[i] & 0xc0) == 0x80) { | |
179 if (uc_left > 0) { | |
180 uc <<= 6; | |
181 uc |= src[i] & 0x3f; | |
182 uc_left--; | |
183 | |
184 if (uc_left == 0) { | |
185 int valid = 0; | |
186 int k; | |
187 | |
188 if (uc >= uc_min) { | |
189 for (k = 0; k < 128; k++) { | |
190 if (uc == table_cp1250[k]) { | |
191 result[j++] = k + 128; | |
192 valid = 1; | |
193 break; | |
194 } | |
195 } | |
196 } | |
197 | |
198 if (!valid && uc != 0xfeff) /* Byte Order Mark */ | |
199 result[j++] = '?'; | |
200 } | |
201 } | |
202 } else { | |
203 if (uc_left != 0) { | |
204 result[j++] = '?'; | |
205 uc_left = 0; | |
206 } | |
207 result[j++] = src[i]; | |
208 } | |
209 } | |
210 | |
211 if ((uc_left != 0) && (src[i] == 0)) | |
212 result[j++] = '?'; | |
213 | |
214 result[j] = 0; | |
215 | |
216 return result; | |
217 } | |
218 | |
219 /** | |
220 * \internal Zamienia kodowanie tekstu. | |
221 * | |
222 * \param src Tekst źródłowy. | |
223 * \param src_encoding Kodowanie tekstu źródłowego. | |
224 * \param dst_encoding Kodowanie tekstu docelowego. | |
225 * \param src_length Długość ciągu źródłowego w bajtach (nigdy ujemna). | |
226 * \param dst_length Długość ciągu docelowego w bajtach (jeśli -1, nieograniczona). | |
227 * | |
228 * \return Zaalokowany bufor z tekstem w kodowaniu docelowym. | |
229 */ | |
230 char *gg_encoding_convert(const char *src, gg_encoding_t src_encoding, gg_encoding_t dst_encoding, int src_length, int dst_length) | |
231 { | |
232 char *result; | |
233 | |
234 if (src == NULL) { | |
235 errno = EINVAL; | |
236 return NULL; | |
237 } | |
238 | |
239 // specjalny przypadek obsługiwany ekspresowo | |
240 if ((dst_encoding == src_encoding) && (dst_length == -1) && (src_length == -1)) | |
241 return strdup(src); | |
242 | |
243 if (src_length == -1) | |
244 src_length = strlen(src); | |
245 | |
246 if (dst_encoding == src_encoding) { | |
247 int len; | |
248 | |
249 if (dst_length == -1) | |
250 len = src_length; | |
251 else | |
252 len = (src_length < dst_length) ? src_length : dst_length; | |
253 | |
254 result = malloc(len + 1); | |
255 | |
256 if (result == NULL) | |
257 return NULL; | |
258 | |
259 strncpy(result, src, len); | |
260 result[len] = 0; | |
261 | |
262 return result; | |
263 } | |
264 | |
265 if (dst_encoding == GG_ENCODING_CP1250 && src_encoding == GG_ENCODING_UTF8) | |
266 return gg_encoding_convert_utf8_cp1250(src, src_length, dst_length); | |
267 | |
268 if (dst_encoding == GG_ENCODING_UTF8 && src_encoding == GG_ENCODING_CP1250) | |
269 return gg_encoding_convert_cp1250_utf8(src, src_length, dst_length); | |
270 | |
271 errno = EINVAL; | |
272 return NULL; | |
273 } | |
274 |