comparison libpurple/util.c @ 27944:cfa78428d8b2

propagate from branch 'im.pidgin.pidgin' (head 8f2b3a7063a7a0e5cae9de55e55f7066c0d202d6) to branch 'im.pidgin.pidgin.yaz' (head f0674cac79a9d60c5a3d998b37fc2cd75ce0a6b2)
author Yoshiki Yazawa <yaz@honeyplanet.jp>
date Sat, 02 May 2009 20:35:36 +0000
parents ab31daf1c1ee 1688f7e15530
children c2ac87c5a035
comparison
equal deleted inserted replaced
26861:d387f1164b3a 27944:cfa78428d8b2
1221 1221
1222 if (check_value != '\0' && *p == check_value) 1222 if (check_value != '\0' && *p == check_value)
1223 return FALSE; 1223 return FALSE;
1224 1224
1225 q = strstr(p, end_token); 1225 q = strstr(p, end_token);
1226 if(q == NULL) //yaz
1227 return FALSE;
1226 1228
1227 /* Trim leading blanks */ 1229 /* Trim leading blanks */
1228 while (*p != '\n' && g_ascii_isspace(*p)) { 1230 while (*p != '\n' && g_ascii_isspace(*p)) {
1229 p += 1; 1231 p += 1;
1230 } 1232 }
1233 while (q > p && g_ascii_isspace(*(q - 1))) { 1235 while (q > p && g_ascii_isspace(*(q - 1))) {
1234 q -= 1; 1236 q -= 1;
1235 } 1237 }
1236 1238
1237 /* Don't bother with null strings */ 1239 /* Don't bother with null strings */
1238 if (p == q) 1240 if (p >= q)
1239 return FALSE; 1241 return FALSE;
1240 1242
1241 if (q != NULL && (!no_value_token || 1243 if (q != NULL && (!no_value_token ||
1242 (no_value_token && strncmp(p, no_value_token, 1244 (no_value_token && strncmp(p, no_value_token,
1243 strlen(no_value_token))))) 1245 strlen(no_value_token)))))
2004 2006
2005 return str2; 2007 return str2;
2006 } 2008 }
2007 2009
2008 static gboolean 2010 static gboolean
2011 is_zenkaku_space(const char *c)
2012 {
2013 gboolean rv = FALSE;
2014 const guchar *u = (guchar *)c;
2015
2016 if(!c || !strcmp(c, "") || strlen(c) < 3)
2017 rv = FALSE;
2018 else if(*u == 0xe3 && *(u+1) == 0x80 && *(u+2) == 0x80)
2019 rv = TRUE;
2020
2021 return rv;
2022 }
2023
2024 static gboolean
2009 badchar(char c) 2025 badchar(char c)
2010 { 2026 {
2011 switch (c) { 2027 switch (c) {
2012 case ' ': 2028 case ' ':
2013 case ',': 2029 case ',':
2041 const char *c, *t, *q = NULL; 2057 const char *c, *t, *q = NULL;
2042 char *tmpurlbuf, *url_buf; 2058 char *tmpurlbuf, *url_buf;
2043 gunichar g; 2059 gunichar g;
2044 gboolean inside_html = FALSE; 2060 gboolean inside_html = FALSE;
2045 int inside_paren = 0; 2061 int inside_paren = 0;
2062 int inside_bracket = 0;
2046 GString *ret; 2063 GString *ret;
2047 2064
2048 if (text == NULL) 2065 if (text == NULL)
2049 return NULL; 2066 return NULL;
2050 2067
2053 c = text; 2070 c = text;
2054 while (*c) { 2071 while (*c) {
2055 2072
2056 if(*c == '(' && !inside_html) { 2073 if(*c == '(' && !inside_html) {
2057 inside_paren++; 2074 inside_paren++;
2075 ret = g_string_append_c(ret, *c);
2076 c++;
2077 }
2078
2079 if(*c == '[' && !inside_html) {
2080 inside_bracket++;
2058 ret = g_string_append_c(ret, *c); 2081 ret = g_string_append_c(ret, *c);
2059 c++; 2082 c++;
2060 } 2083 }
2061 2084
2062 if(inside_html) { 2085 if(inside_html) {
2084 } 2107 }
2085 } else if ((*c=='h') && (!g_ascii_strncasecmp(c, "http://", 7) || 2108 } else if ((*c=='h') && (!g_ascii_strncasecmp(c, "http://", 7) ||
2086 (!g_ascii_strncasecmp(c, "https://", 8)))) { 2109 (!g_ascii_strncasecmp(c, "https://", 8)))) {
2087 t = c; 2110 t = c;
2088 while (1) { 2111 while (1) {
2089 if (badchar(*t) || badentity(t)) { 2112 if (badchar(*t) || badentity(t) || is_zenkaku_space(t)) {
2090 2113
2091 if ((!g_ascii_strncasecmp(c, "http://", 7) && (t - c == 7)) || 2114 if ((!g_ascii_strncasecmp(c, "http://", 7) && (t - c == 7)) ||
2092 (!g_ascii_strncasecmp(c, "https://", 8) && (t - c == 8))) { 2115 (!g_ascii_strncasecmp(c, "https://", 8) && (t - c == 8))) {
2093 break; 2116 break;
2094 } 2117 }
2099 } 2122 }
2100 2123
2101 if (*(t - 1) == '.') 2124 if (*(t - 1) == '.')
2102 t--; 2125 t--;
2103 if ((*(t - 1) == ')' && (inside_paren > 0))) { 2126 if ((*(t - 1) == ')' && (inside_paren > 0))) {
2127 t--;
2128 }
2129 if ((*(t - 1) == ']' && (inside_bracket > 0))) {
2104 t--; 2130 t--;
2105 } 2131 }
2106 2132
2107 url_buf = g_strndup(c, t - c); 2133 url_buf = g_strndup(c, t - c);
2108 tmpurlbuf = purple_unescape_html(url_buf); 2134 tmpurlbuf = purple_unescape_html(url_buf);
2118 } 2144 }
2119 } else if (!g_ascii_strncasecmp(c, "www.", 4) && (c == text || badchar(c[-1]) || badentity(c-1))) { 2145 } else if (!g_ascii_strncasecmp(c, "www.", 4) && (c == text || badchar(c[-1]) || badentity(c-1))) {
2120 if (c[4] != '.') { 2146 if (c[4] != '.') {
2121 t = c; 2147 t = c;
2122 while (1) { 2148 while (1) {
2123 if (badchar(*t) || badentity(t)) { 2149 if (badchar(*t) || badentity(t) || is_zenkaku_space(t)) {
2124 if (t - c == 4) { 2150 if (t - c == 4) {
2125 break; 2151 break;
2126 } 2152 }
2127 2153
2128 if (*(t) == ',' && (*(t + 1) != ' ')) { 2154 if (*(t) == ',' && (*(t + 1) != ' ')) {
2131 } 2157 }
2132 2158
2133 if (*(t - 1) == '.') 2159 if (*(t - 1) == '.')
2134 t--; 2160 t--;
2135 if ((*(t - 1) == ')' && (inside_paren > 0))) { 2161 if ((*(t - 1) == ')' && (inside_paren > 0))) {
2162 t--;
2163 }
2164 if ((*(t - 1) == ']' && (inside_bracket > 0))) {
2136 t--; 2165 t--;
2137 } 2166 }
2138 url_buf = g_strndup(c, t - c); 2167 url_buf = g_strndup(c, t - c);
2139 tmpurlbuf = purple_unescape_html(url_buf); 2168 tmpurlbuf = purple_unescape_html(url_buf);
2140 g_string_append_printf(ret, 2169 g_string_append_printf(ret,
2149 } 2178 }
2150 } 2179 }
2151 } else if (!g_ascii_strncasecmp(c, "ftp://", 6) || !g_ascii_strncasecmp(c, "sftp://", 7)) { 2180 } else if (!g_ascii_strncasecmp(c, "ftp://", 6) || !g_ascii_strncasecmp(c, "sftp://", 7)) {
2152 t = c; 2181 t = c;
2153 while (1) { 2182 while (1) {
2154 if (badchar(*t) || badentity(t)) { 2183 if (badchar(*t) || badentity(t) || is_zenkaku_space(t)) {
2155 2184
2156 if ((!g_ascii_strncasecmp(c, "ftp://", 6) && (t - c == 6)) || 2185 if ((!g_ascii_strncasecmp(c, "ftp://", 6) && (t - c == 6)) ||
2157 (!g_ascii_strncasecmp(c, "sftp://", 7) && (t - c == 7))) { 2186 (!g_ascii_strncasecmp(c, "sftp://", 7) && (t - c == 7))) {
2158 break; 2187 break;
2159 } 2188 }
2160 2189
2161 if (*(t - 1) == '.') 2190 if (*(t - 1) == '.')
2162 t--; 2191 t--;
2163 if ((*(t - 1) == ')' && (inside_paren > 0))) { 2192 if ((*(t - 1) == ')' && (inside_paren > 0))) {
2193 t--;
2194 }
2195 if ((*(t - 1) == ']' && (inside_bracket > 0))) {
2164 t--; 2196 t--;
2165 } 2197 }
2166 url_buf = g_strndup(c, t - c); 2198 url_buf = g_strndup(c, t - c);
2167 tmpurlbuf = purple_unescape_html(url_buf); 2199 tmpurlbuf = purple_unescape_html(url_buf);
2168 g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>", 2200 g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>",
2179 } 2211 }
2180 } else if (!g_ascii_strncasecmp(c, "ftp.", 4) && (c == text || badchar(c[-1]) || badentity(c-1))) { 2212 } else if (!g_ascii_strncasecmp(c, "ftp.", 4) && (c == text || badchar(c[-1]) || badentity(c-1))) {
2181 if (c[4] != '.') { 2213 if (c[4] != '.') {
2182 t = c; 2214 t = c;
2183 while (1) { 2215 while (1) {
2184 if (badchar(*t) || badentity(t)) { 2216 if (badchar(*t) || badentity(t) || is_zenkaku_space(t)) {
2185 if (t - c == 4) { 2217 if (t - c == 4) {
2186 break; 2218 break;
2187 } 2219 }
2188 if (*(t - 1) == '.') 2220 if (*(t - 1) == '.')
2189 t--; 2221 t--;
2190 if ((*(t - 1) == ')' && (inside_paren > 0))) { 2222 if ((*(t - 1) == ')' && (inside_paren > 0))) {
2223 t--;
2224 }
2225 if ((*(t - 1) == ']' && (inside_bracket > 0))) {
2191 t--; 2226 t--;
2192 } 2227 }
2193 url_buf = g_strndup(c, t - c); 2228 url_buf = g_strndup(c, t - c);
2194 tmpurlbuf = purple_unescape_html(url_buf); 2229 tmpurlbuf = purple_unescape_html(url_buf);
2195 g_string_append_printf(ret, 2230 g_string_append_printf(ret,
2206 } 2241 }
2207 } 2242 }
2208 } else if (!g_ascii_strncasecmp(c, "mailto:", 7)) { 2243 } else if (!g_ascii_strncasecmp(c, "mailto:", 7)) {
2209 t = c; 2244 t = c;
2210 while (1) { 2245 while (1) {
2211 if (badchar(*t) || badentity(t)) { 2246 if (badchar(*t) || badentity(t) || is_zenkaku_space(t)) {
2212 char *d; 2247 char *d;
2213 if (t - c == 7) { 2248 if (t - c == 7) {
2214 break; 2249 break;
2215 } 2250 }
2216 if (*(t - 1) == '.') 2251 if (*(t - 1) == '.')
2240 } 2275 }
2241 } else if ((*c=='x') && (!g_ascii_strncasecmp(c, "xmpp:", 5)) && 2276 } else if ((*c=='x') && (!g_ascii_strncasecmp(c, "xmpp:", 5)) &&
2242 (c == text || badchar(c[-1]) || badentity(c-1))) { 2277 (c == text || badchar(c[-1]) || badentity(c-1))) {
2243 t = c; 2278 t = c;
2244 while (1) { 2279 while (1) {
2245 if (badchar(*t) || badentity(t)) { 2280 if (badchar(*t) || badentity(t) || is_zenkaku_space(t)) {
2246 2281
2247 if (t - c == 5) { 2282 if (t - c == 5) {
2248 break; 2283 break;
2249 } 2284 }
2250 2285
2254 } 2289 }
2255 2290
2256 if (*(t - 1) == '.') 2291 if (*(t - 1) == '.')
2257 t--; 2292 t--;
2258 if ((*(t - 1) == ')' && (inside_paren > 0))) { 2293 if ((*(t - 1) == ')' && (inside_paren > 0))) {
2294 t--;
2295 }
2296 if ((*(t - 1) == ']' && (inside_bracket > 0))) {
2259 t--; 2297 t--;
2260 } 2298 }
2261 2299
2262 url_buf = g_strndup(c, t - c); 2300 url_buf = g_strndup(c, t - c);
2263 tmpurlbuf = purple_unescape_html(url_buf); 2301 tmpurlbuf = purple_unescape_html(url_buf);
2340 } 2378 }
2341 } 2379 }
2342 2380
2343 if(*c == ')' && !inside_html) { 2381 if(*c == ')' && !inside_html) {
2344 inside_paren--; 2382 inside_paren--;
2383 ret = g_string_append_c(ret, *c);
2384 c++;
2385 }
2386 if(*c == ']' && !inside_html) {
2387 inside_bracket--;
2345 ret = g_string_append_c(ret, *c); 2388 ret = g_string_append_c(ret, *c);
2346 c++; 2389 c++;
2347 } 2390 }
2348 2391
2349 if (*c == 0) 2392 if (*c == 0)
4847 } else { 4890 } else {
4848 return hostname; 4891 return hostname;
4849 } 4892 }
4850 #endif 4893 #endif
4851 } 4894 }
4895
4896 #ifdef _WIN32
4897 void botch_ucs(gchar *ucs_src, gsize len)
4898 {
4899 /* no operation */
4900 }
4901 #else
4902 void botch_ucs(gchar *ucs_src, gsize len)
4903 {
4904 gint i;
4905 guchar *ucs = (guchar *)ucs_src;
4906
4907 g_return_if_fail(ucs_src != NULL);
4908 g_return_if_fail(len > 0);
4909
4910 for(i=0;i<len;i+=2){
4911 switch(*(ucs+i)){
4912 case 0x00:
4913 switch(*(ucs+i+1)){
4914 case 0xa2: // ¢
4915 *(ucs+i) = 0xff;
4916 *(ucs+i+1) = 0xe0;
4917 break;
4918 case 0xa3: // £
4919 *(ucs+i) = 0xff;
4920 *(ucs+i+1) = 0xe1;
4921 break;
4922 case 0xac: // ¬
4923 *(ucs+i) = 0xff;
4924 *(ucs+i+1) = 0xe2;
4925 break;
4926 }
4927 break;
4928 case 0x20: // ‖
4929 if(*(ucs+i+1) == 0x16){
4930 *(ucs+i) = 0x22;
4931 *(ucs+i+1) = 0x25;
4932 }
4933 break;
4934 case 0x22: // −
4935 if(*(ucs+i+1) == 0x12){
4936 *(ucs+i) = 0xff;
4937 *(ucs+i+1) = 0x0d;
4938 }
4939 break;
4940 case 0x30: // 〜
4941 if(*(ucs+i+1) == 0x1c){
4942 *(ucs+i) = 0xff;
4943 *(ucs+i+1) = 0x5e;
4944 }
4945 break;
4946 }
4947 }
4948
4949 }
4950 #endif
4951
4952 #ifdef _WIN32
4953 void sanitize_ucs(gchar *ucs, gsize len)
4954 {
4955 /* no operation */
4956 }
4957 #else
4958 void sanitize_ucs(gchar *ucs_src, gsize len)
4959 {
4960 gint i;
4961 guchar *ucs = (guchar *)ucs_src;
4962
4963 g_return_if_fail(ucs_src != NULL);
4964 g_return_if_fail(len > 0);
4965
4966 for(i=0;i<len;i+=2){
4967 switch(*(ucs+i)){
4968 case 0x22:
4969 switch(*(ucs+i+1)){
4970 case 0x25: // ‖
4971 *(ucs+i) = 0x20;
4972 *(ucs+i+1) = 0x16;
4973 break;
4974 }
4975 break;
4976 case 0xff:
4977 switch(*(ucs+i+1)){
4978 case 0x0d: // −
4979 *(ucs+i) = 0x22;
4980 *(ucs+i+1) = 0x12;
4981 break;
4982 case 0x5e: // 〜
4983 *(ucs+i) = 0x30;
4984 *(ucs+i+1) = 0x1c;
4985 break;
4986 case 0xe0: // ¢
4987 *(ucs+i) = 0x00;
4988 *(ucs+i+1) = 0xa2;
4989 break;
4990 case 0xe1: // £
4991 *(ucs+i) = 0x00;
4992 *(ucs+i+1) = 0xa3;
4993 break;
4994 case 0xe2: // ¬
4995 *(ucs+i) = 0x00;
4996 *(ucs+i+1) = 0xac;
4997 break;
4998 }
4999 break;
5000 }
5001 }
5002 }
5003 #endif
5004
5005 #ifdef _WIN32
5006 gchar *sanitize_utf(const gchar *msg, gsize len, gsize *newlen)
5007 {
5008 g_return_val_if_fail(msg != NULL, NULL);
5009 if(len == -1)
5010 len = strlen(msg);
5011 g_return_val_if_fail(len > 0, NULL);
5012
5013 if(newlen)
5014 *newlen = len;
5015
5016 return g_strndup(msg, len);
5017 }
5018 #else
5019 gchar *sanitize_utf(const gchar *msg, gsize len, gsize *newlen)
5020 {
5021 gint i;
5022 size_t bytes;
5023 guchar *utf;
5024
5025 g_return_val_if_fail(msg != NULL, NULL);
5026 if(len == -1)
5027 len = strlen(msg);
5028 g_return_val_if_fail(len > 0, NULL);
5029
5030 utf = (guchar *)g_strndup(msg, len);
5031
5032 bytes = len;
5033
5034 for(i=0;i<len;i++){
5035 switch(*(utf+i)){
5036 case 0xe2:
5037 if(*(utf+i+1) == 0x88) {
5038 if(*(utf+i+2) == 0xa5) { // ‖
5039 *(utf+i) = 0xe2;
5040 *(utf+i+1) = 0x80;
5041 *(utf+i+2) = 0x96;
5042 }
5043 }
5044 break;
5045 case 0xef:
5046 switch(*(utf+i+1)){
5047 case 0xbc:
5048 if(*(utf+i+2) == 0x8d) { // −
5049 *(utf+i) = 0xe2;
5050 *(utf+i+1) = 0x88;
5051 *(utf+i+2) = 0x92;
5052 }
5053 break;
5054 case 0xbd:
5055 if(*(utf+i+2) == 0x9e) { // 〜
5056 *(utf+i) = 0xe3;
5057 *(utf+i+1) = 0x80;
5058 *(utf+i+2) = 0x9c;
5059 }
5060 break;
5061 case 0xbf:
5062 switch(*(utf+i+2)){
5063 case 0xa0: // ¢
5064 *(utf+i) = 0xc2;
5065 *(utf+i+1) = 0xa2;
5066 memmove(utf+i+2, utf+i+3,
5067 len-i-3); //shorten by 1byte
5068 bytes--;
5069 break;
5070 case 0xa1: // £
5071 *(utf+i) = 0xc2;
5072 *(utf+i+1) = 0xa3;
5073 memmove(utf+i+2, utf+i+3,
5074 len-i-3); //shorten by 1byte
5075 bytes--;
5076 break;
5077 case 0xa2: // ¬
5078 *(utf+i) = 0xc2;
5079 *(utf+i+1) = 0xac;
5080 memmove(utf+i+2, utf+i+3,
5081 len-i-3); //shorten by 1byte
5082 bytes--;
5083 break;
5084 }
5085 break;
5086 }
5087 break;
5088 }
5089 }
5090 *(utf+bytes)= 0x00; //terminate
5091 if(newlen)
5092 *newlen = bytes;
5093 return (gchar *)utf;
5094 }
5095 #endif
5096
5097 #ifdef _WIN32
5098 gchar *botch_utf(const gchar *msg, gsize len, gsize *newlen)
5099 {
5100 g_return_val_if_fail(msg != NULL, NULL);
5101 if(len == -1)
5102 len = strlen(msg);
5103 g_return_val_if_fail(len > 0, NULL);
5104
5105 if(newlen)
5106 *newlen = len;
5107
5108 return g_strndup(msg, len);
5109 }
5110 #else
5111 gchar *botch_utf(const gchar *msg, gsize len, gsize *newlen)
5112 {
5113 int i,bytes;
5114 unsigned char *utf;
5115
5116 g_return_val_if_fail(msg != NULL, NULL);
5117 if(len == -1)
5118 len = strlen(msg);
5119 g_return_val_if_fail(len > 0, NULL);
5120
5121 bytes = len;
5122
5123 utf = g_malloc0(bytes*3/2+1); /* new length might be 3/2 in the worst case */
5124 memcpy(utf, msg, bytes);
5125
5126 for(i=0;i<bytes;i++){
5127 switch(*(utf+i)){
5128 case 0xc2:
5129 switch(*(utf+i+1)){
5130 case 0xa2: // ¢
5131 *(utf+i) = 0xef;
5132 *(utf+i+1) = 0xbf;
5133 memmove(utf+i+3, utf+i+2, bytes-i-2);
5134 *(utf+i+2) = 0xa0;
5135 bytes++;
5136 break;
5137 case 0xa3: // £
5138 *(utf+i) = 0xef;
5139 *(utf+i+1) = 0xbf;
5140 memmove(utf+i+3, utf+i+2, bytes-i-2);
5141 *(utf+i+2) = 0xa1;
5142 bytes++;
5143 break;
5144 case 0xac: // ¬
5145 *(utf+i) = 0xef;
5146 *(utf+i+1) = 0xbf;
5147 memmove(utf+i+3, utf+i+2, bytes-i-2);
5148 *(utf+i+2) = 0xa2;
5149 bytes++;
5150 break;
5151 }
5152 break;
5153 case 0xe2:
5154 switch(*(utf+i+1)){
5155 case 0x80: // ‖
5156 if(*(utf+i+2) == 0x96){
5157 *(utf+i) = 0xe2;
5158 *(utf+i+1) = 0x88;
5159 *(utf+i+2) = 0xa5;
5160 }
5161 break;
5162 case 0x88: // −
5163 if(*(utf+i+1) == 0x92){
5164 *(utf+i) = 0xef;
5165 *(utf+i+1) = 0xbc;
5166 *(utf+i+2) = 0x8d;
5167 }
5168 break;
5169 }
5170 break;
5171 case 0xe3: // 〜
5172 if(*(utf+i+1) == 0x80){
5173 if(*(utf+i+2) == 0x9c){
5174 *(utf+i) = 0xef;
5175 *(utf+i+1) = 0xbd;
5176 *(utf+i+2) = 0x9e;
5177 }
5178 }
5179 break;
5180 } //switch
5181 }
5182 *(utf+bytes) = 0x00; //terminate
5183 if(newlen)
5184 *newlen = bytes;
5185 return (gchar *)utf;
5186 }
5187 #endif