2541
|
1 /* Copyright (C) 1999-2001 Bruno Haible.
|
|
2 This file is not part of the GNU LIBICONV Library.
|
|
3 This file is put into the public domain. */
|
|
4
|
|
5 #ifdef HAVE_CONFIG_H
|
|
6 #include <config.h>
|
|
7 #endif
|
|
8
|
|
9 #ifdef HAVE_ICONV
|
|
10 #include "iconv_string.h"
|
|
11 #include <iconv.h>
|
|
12 #include <errno.h>
|
|
13 #include <stdlib.h>
|
|
14 #include <string.h>
|
|
15
|
|
16 #define tmpbufsize 4096
|
|
17
|
|
18 int iconv_string (const char* tocode, const char* fromcode,
|
|
19 const char* start, const char* end,
|
|
20 char** resultp, size_t* lengthp)
|
|
21 {
|
|
22 iconv_t cd = iconv_open(tocode,fromcode);
|
|
23 size_t length;
|
|
24 char* result;
|
|
25 if (cd == (iconv_t)(-1)) {
|
|
26 if (errno != EINVAL)
|
|
27 return -1;
|
|
28 /* Unsupported fromcode or tocode. Check whether the caller requested
|
|
29 autodetection. */
|
|
30 if (!strcmp(fromcode,"autodetect_utf8")) {
|
|
31 int ret;
|
|
32 /* Try UTF-8 first. There are very few ISO-8859-1 inputs that would
|
|
33 be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */
|
|
34 ret = iconv_string(tocode,"UTF-8",start,end,resultp,lengthp);
|
|
35 if (!(ret < 0 && errno == EILSEQ))
|
|
36 return ret;
|
|
37 ret = iconv_string(tocode,"ISO-8859-1",start,end,resultp,lengthp);
|
|
38 return ret;
|
|
39 }
|
|
40 if (!strcmp(fromcode,"autodetect_jp")) {
|
|
41 int ret;
|
|
42 /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
|
|
43 it will fail. */
|
|
44 ret = iconv_string(tocode,"ISO-2022-JP-2",start,end,resultp,lengthp);
|
|
45 if (!(ret < 0 && errno == EILSEQ))
|
|
46 return ret;
|
|
47 /* Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This
|
|
48 is unavoidable. People will condemn SHIFT_JIS.
|
|
49 If we tried SHIFT_JIS first, then some short EUC-JP inputs would
|
|
50 come out wrong, and people would condemn EUC-JP and Unix, which
|
|
51 would not be good. */
|
|
52 ret = iconv_string(tocode,"EUC-JP",start,end,resultp,lengthp);
|
|
53 if (!(ret < 0 && errno == EILSEQ))
|
|
54 return ret;
|
|
55 /* Finally try SHIFT_JIS. */
|
|
56 ret = iconv_string(tocode,"SHIFT_JIS",start,end,resultp,lengthp);
|
|
57 return ret;
|
|
58 }
|
|
59 if (!strcmp(fromcode,"autodetect_kr")) {
|
|
60 int ret;
|
|
61 /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
|
|
62 it will fail. */
|
|
63 ret = iconv_string(tocode,"ISO-2022-KR",start,end,resultp,lengthp);
|
|
64 if (!(ret < 0 && errno == EILSEQ))
|
|
65 return ret;
|
|
66 /* Finally try EUC-KR. */
|
|
67 ret = iconv_string(tocode,"EUC-KR",start,end,resultp,lengthp);
|
|
68 return ret;
|
|
69 }
|
|
70 errno = EINVAL;
|
|
71 return -1;
|
|
72 }
|
|
73 /* Determine the length we need. */
|
|
74 {
|
|
75 size_t count = 0;
|
|
76 char tmpbuf[tmpbufsize];
|
|
77 const char* inptr = start;
|
|
78 size_t insize = end-start;
|
|
79 while (insize > 0) {
|
|
80 char* outptr = tmpbuf;
|
|
81 size_t outsize = tmpbufsize;
|
|
82 size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize);
|
|
83 if (res == (size_t)(-1)) {
|
|
84 if (errno == EINVAL)
|
|
85 break;
|
|
86 else {
|
|
87 int saved_errno = errno;
|
|
88 iconv_close(cd);
|
|
89 errno = saved_errno;
|
|
90 return -1;
|
|
91 }
|
|
92 }
|
|
93 count += outptr-tmpbuf;
|
|
94 }
|
|
95 {
|
|
96 char* outptr = tmpbuf;
|
|
97 size_t outsize = tmpbufsize;
|
|
98 size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
|
|
99 if (res == (size_t)(-1)) {
|
|
100 int saved_errno = errno;
|
|
101 iconv_close(cd);
|
|
102 errno = saved_errno;
|
|
103 return -1;
|
|
104 }
|
|
105 count += outptr-tmpbuf;
|
|
106 }
|
|
107 length = count;
|
|
108 }
|
|
109 if (lengthp != NULL)
|
|
110 *lengthp = length;
|
|
111 if (resultp == NULL) {
|
|
112 iconv_close(cd);
|
|
113 return 0;
|
|
114 }
|
|
115 result = (*resultp == NULL ? malloc(length) : realloc(*resultp,length));
|
|
116 *resultp = result;
|
|
117 if (length == 0) {
|
|
118 iconv_close(cd);
|
|
119 return 0;
|
|
120 }
|
|
121 if (result == NULL) {
|
|
122 iconv_close(cd);
|
|
123 errno = ENOMEM;
|
|
124 return -1;
|
|
125 }
|
|
126 iconv(cd,NULL,NULL,NULL,NULL); /* return to the initial state */
|
|
127 /* Do the conversion for real. */
|
|
128 {
|
|
129 const char* inptr = start;
|
|
130 size_t insize = end-start;
|
|
131 char* outptr = result;
|
|
132 size_t outsize = length;
|
|
133 while (insize > 0) {
|
|
134 size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize);
|
|
135 if (res == (size_t)(-1)) {
|
|
136 if (errno == EINVAL)
|
|
137 break;
|
|
138 else {
|
|
139 int saved_errno = errno;
|
|
140 iconv_close(cd);
|
|
141 errno = saved_errno;
|
|
142 return -1;
|
|
143 }
|
|
144 }
|
|
145 }
|
|
146 {
|
|
147 size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
|
|
148 if (res == (size_t)(-1)) {
|
|
149 int saved_errno = errno;
|
|
150 iconv_close(cd);
|
|
151 errno = saved_errno;
|
|
152 return -1;
|
|
153 }
|
|
154 }
|
|
155 if (outsize != 0) abort();
|
|
156 }
|
|
157 iconv_close(cd);
|
|
158 return 0;
|
|
159 }
|
|
160
|
|
161 #endif
|