comparison libpurple/util.c @ 27930:25319f536d93

propagate from branch 'im.pidgin.pidgin' (head 5aca19ea8964e864e3374ac4fd3c1c81cd23a426) to branch 'im.pidgin.pidgin.yaz' (head 908c143511eb03dcaab43319cf4555427004823f)
author Yoshiki Yazawa <yaz@honeyplanet.jp>
date Tue, 24 Feb 2009 10:26:45 +0000
parents 45434c3fd878
children d8e6a2d592a4
comparison
equal deleted inserted replaced
25144:ac8defe6eee4 27930:25319f536d93
1221 1221
1222 if (check_value != '\0' && *p == check_value) 1222 if (check_value != '\0' && *p == check_value)
1223 return FALSE; 1223 return FALSE;
1224 1224
1225 q = strstr(p, end_token); 1225 q = strstr(p, end_token);
1226 if(q == NULL) //yaz
1227 return FALSE;
1226 1228
1227 /* Trim leading blanks */ 1229 /* Trim leading blanks */
1228 while (*p != '\n' && g_ascii_isspace(*p)) { 1230 while (*p != '\n' && g_ascii_isspace(*p)) {
1229 p += 1; 1231 p += 1;
1230 } 1232 }
1233 while (q > p && g_ascii_isspace(*(q - 1))) { 1235 while (q > p && g_ascii_isspace(*(q - 1))) {
1234 q -= 1; 1236 q -= 1;
1235 } 1237 }
1236 1238
1237 /* Don't bother with null strings */ 1239 /* Don't bother with null strings */
1238 if (p == q) 1240 if (p >= q)
1239 return FALSE; 1241 return FALSE;
1240 1242
1241 if (q != NULL && (!no_value_token || 1243 if (q != NULL && (!no_value_token ||
1242 (no_value_token && strncmp(p, no_value_token, 1244 (no_value_token && strncmp(p, no_value_token,
1243 strlen(no_value_token))))) 1245 strlen(no_value_token)))))
2004 2006
2005 return str2; 2007 return str2;
2006 } 2008 }
2007 2009
2008 static gboolean 2010 static gboolean
2011 is_zenkaku_space(const char *c)
2012 {
2013 gboolean rv = FALSE;
2014 const guchar *u = (guchar *)c;
2015
2016 if(!c || !strcmp(c, "") || strlen(c) < 3)
2017 rv = FALSE;
2018 else if(*u == 0xe3 && *(u+1) == 0x80 && *(u+2) == 0x80)
2019 rv = TRUE;
2020
2021 return rv;
2022 }
2023
2024 static gboolean
2009 badchar(char c) 2025 badchar(char c)
2010 { 2026 {
2011 switch (c) { 2027 switch (c) {
2012 case ' ': 2028 case ' ':
2013 case ',': 2029 case ',':
2041 const char *c, *t, *q = NULL; 2057 const char *c, *t, *q = NULL;
2042 char *tmpurlbuf, *url_buf; 2058 char *tmpurlbuf, *url_buf;
2043 gunichar g; 2059 gunichar g;
2044 gboolean inside_html = FALSE; 2060 gboolean inside_html = FALSE;
2045 int inside_paren = 0; 2061 int inside_paren = 0;
2062 int inside_bracket = 0;
2046 GString *ret; 2063 GString *ret;
2047 2064
2048 if (text == NULL) 2065 if (text == NULL)
2049 return NULL; 2066 return NULL;
2050 2067
2053 c = text; 2070 c = text;
2054 while (*c) { 2071 while (*c) {
2055 2072
2056 if(*c == '(' && !inside_html) { 2073 if(*c == '(' && !inside_html) {
2057 inside_paren++; 2074 inside_paren++;
2075 ret = g_string_append_c(ret, *c);
2076 c++;
2077 }
2078
2079 if(*c == '[' && !inside_html) {
2080 inside_bracket++;
2058 ret = g_string_append_c(ret, *c); 2081 ret = g_string_append_c(ret, *c);
2059 c++; 2082 c++;
2060 } 2083 }
2061 2084
2062 if(inside_html) { 2085 if(inside_html) {
2084 } 2107 }
2085 } else if ((*c=='h') && (!g_ascii_strncasecmp(c, "http://", 7) || 2108 } else if ((*c=='h') && (!g_ascii_strncasecmp(c, "http://", 7) ||
2086 (!g_ascii_strncasecmp(c, "https://", 8)))) { 2109 (!g_ascii_strncasecmp(c, "https://", 8)))) {
2087 t = c; 2110 t = c;
2088 while (1) { 2111 while (1) {
2089 if (badchar(*t) || badentity(t)) { 2112 if (badchar(*t) || badentity(t) || is_zenkaku_space(t)) {
2090 2113
2091 if ((!g_ascii_strncasecmp(c, "http://", 7) && (t - c == 7)) || 2114 if ((!g_ascii_strncasecmp(c, "http://", 7) && (t - c == 7)) ||
2092 (!g_ascii_strncasecmp(c, "https://", 8) && (t - c == 8))) { 2115 (!g_ascii_strncasecmp(c, "https://", 8) && (t - c == 8))) {
2093 break; 2116 break;
2094 } 2117 }
2099 } 2122 }
2100 2123
2101 if (*(t - 1) == '.') 2124 if (*(t - 1) == '.')
2102 t--; 2125 t--;
2103 if ((*(t - 1) == ')' && (inside_paren > 0))) { 2126 if ((*(t - 1) == ')' && (inside_paren > 0))) {
2127 t--;
2128 }
2129 if ((*(t - 1) == ']' && (inside_bracket > 0))) {
2104 t--; 2130 t--;
2105 } 2131 }
2106 2132
2107 url_buf = g_strndup(c, t - c); 2133 url_buf = g_strndup(c, t - c);
2108 tmpurlbuf = purple_unescape_html(url_buf); 2134 tmpurlbuf = purple_unescape_html(url_buf);
2118 } 2144 }
2119 } else if (!g_ascii_strncasecmp(c, "www.", 4) && (c == text || badchar(c[-1]) || badentity(c-1))) { 2145 } else if (!g_ascii_strncasecmp(c, "www.", 4) && (c == text || badchar(c[-1]) || badentity(c-1))) {
2120 if (c[4] != '.') { 2146 if (c[4] != '.') {
2121 t = c; 2147 t = c;
2122 while (1) { 2148 while (1) {
2123 if (badchar(*t) || badentity(t)) { 2149 if (badchar(*t) || badentity(t) || is_zenkaku_space(t)) {
2124 if (t - c == 4) { 2150 if (t - c == 4) {
2125 break; 2151 break;
2126 } 2152 }
2127 2153
2128 if (*(t) == ',' && (*(t + 1) != ' ')) { 2154 if (*(t) == ',' && (*(t + 1) != ' ')) {
2131 } 2157 }
2132 2158
2133 if (*(t - 1) == '.') 2159 if (*(t - 1) == '.')
2134 t--; 2160 t--;
2135 if ((*(t - 1) == ')' && (inside_paren > 0))) { 2161 if ((*(t - 1) == ')' && (inside_paren > 0))) {
2162 t--;
2163 }
2164 if ((*(t - 1) == ']' && (inside_bracket > 0))) {
2136 t--; 2165 t--;
2137 } 2166 }
2138 url_buf = g_strndup(c, t - c); 2167 url_buf = g_strndup(c, t - c);
2139 tmpurlbuf = purple_unescape_html(url_buf); 2168 tmpurlbuf = purple_unescape_html(url_buf);
2140 g_string_append_printf(ret, 2169 g_string_append_printf(ret,
2149 } 2178 }
2150 } 2179 }
2151 } else if (!g_ascii_strncasecmp(c, "ftp://", 6) || !g_ascii_strncasecmp(c, "sftp://", 7)) { 2180 } else if (!g_ascii_strncasecmp(c, "ftp://", 6) || !g_ascii_strncasecmp(c, "sftp://", 7)) {
2152 t = c; 2181 t = c;
2153 while (1) { 2182 while (1) {
2154 if (badchar(*t) || badentity(t)) { 2183 if (badchar(*t) || badentity(t) || is_zenkaku_space(t)) {
2155 2184
2156 if ((!g_ascii_strncasecmp(c, "ftp://", 6) && (t - c == 6)) || 2185 if ((!g_ascii_strncasecmp(c, "ftp://", 6) && (t - c == 6)) ||
2157 (!g_ascii_strncasecmp(c, "sftp://", 7) && (t - c == 7))) { 2186 (!g_ascii_strncasecmp(c, "sftp://", 7) && (t - c == 7))) {
2158 break; 2187 break;
2159 } 2188 }
2160 2189
2161 if (*(t - 1) == '.') 2190 if (*(t - 1) == '.')
2162 t--; 2191 t--;
2163 if ((*(t - 1) == ')' && (inside_paren > 0))) { 2192 if ((*(t - 1) == ')' && (inside_paren > 0))) {
2193 t--;
2194 }
2195 if ((*(t - 1) == ']' && (inside_bracket > 0))) {
2164 t--; 2196 t--;
2165 } 2197 }
2166 url_buf = g_strndup(c, t - c); 2198 url_buf = g_strndup(c, t - c);
2167 tmpurlbuf = purple_unescape_html(url_buf); 2199 tmpurlbuf = purple_unescape_html(url_buf);
2168 g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>", 2200 g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>",
2179 } 2211 }
2180 } else if (!g_ascii_strncasecmp(c, "ftp.", 4) && (c == text || badchar(c[-1]) || badentity(c-1))) { 2212 } else if (!g_ascii_strncasecmp(c, "ftp.", 4) && (c == text || badchar(c[-1]) || badentity(c-1))) {
2181 if (c[4] != '.') { 2213 if (c[4] != '.') {
2182 t = c; 2214 t = c;
2183 while (1) { 2215 while (1) {
2184 if (badchar(*t) || badentity(t)) { 2216 if (badchar(*t) || badentity(t) || is_zenkaku_space(t)) {
2185 if (t - c == 4) { 2217 if (t - c == 4) {
2186 break; 2218 break;
2187 } 2219 }
2188 if (*(t - 1) == '.') 2220 if (*(t - 1) == '.')
2189 t--; 2221 t--;
2190 if ((*(t - 1) == ')' && (inside_paren > 0))) { 2222 if ((*(t - 1) == ')' && (inside_paren > 0))) {
2223 t--;
2224 }
2225 if ((*(t - 1) == ']' && (inside_bracket > 0))) {
2191 t--; 2226 t--;
2192 } 2227 }
2193 url_buf = g_strndup(c, t - c); 2228 url_buf = g_strndup(c, t - c);
2194 tmpurlbuf = purple_unescape_html(url_buf); 2229 tmpurlbuf = purple_unescape_html(url_buf);
2195 g_string_append_printf(ret, 2230 g_string_append_printf(ret,
2206 } 2241 }
2207 } 2242 }
2208 } else if (!g_ascii_strncasecmp(c, "mailto:", 7)) { 2243 } else if (!g_ascii_strncasecmp(c, "mailto:", 7)) {
2209 t = c; 2244 t = c;
2210 while (1) { 2245 while (1) {
2211 if (badchar(*t) || badentity(t)) { 2246 if (badchar(*t) || badentity(t) || is_zenkaku_space(t)) {
2212 char *d; 2247 char *d;
2213 if (t - c == 7) { 2248 if (t - c == 7) {
2214 break; 2249 break;
2215 } 2250 }
2216 if (*(t - 1) == '.') 2251 if (*(t - 1) == '.')
2240 } 2275 }
2241 } else if ((*c=='x') && (!g_ascii_strncasecmp(c, "xmpp:", 5)) && 2276 } else if ((*c=='x') && (!g_ascii_strncasecmp(c, "xmpp:", 5)) &&
2242 (c == text || badchar(c[-1]) || badentity(c-1))) { 2277 (c == text || badchar(c[-1]) || badentity(c-1))) {
2243 t = c; 2278 t = c;
2244 while (1) { 2279 while (1) {
2245 if (badchar(*t) || badentity(t)) { 2280 if (badchar(*t) || badentity(t) || is_zenkaku_space(t)) {
2246 2281
2247 if (t - c == 5) { 2282 if (t - c == 5) {
2248 break; 2283 break;
2249 } 2284 }
2250 2285
2254 } 2289 }
2255 2290
2256 if (*(t - 1) == '.') 2291 if (*(t - 1) == '.')
2257 t--; 2292 t--;
2258 if ((*(t - 1) == ')' && (inside_paren > 0))) { 2293 if ((*(t - 1) == ')' && (inside_paren > 0))) {
2294 t--;
2295 }
2296 if ((*(t - 1) == ']' && (inside_bracket > 0))) {
2259 t--; 2297 t--;
2260 } 2298 }
2261 2299
2262 url_buf = g_strndup(c, t - c); 2300 url_buf = g_strndup(c, t - c);
2263 tmpurlbuf = purple_unescape_html(url_buf); 2301 tmpurlbuf = purple_unescape_html(url_buf);
2340 } 2378 }
2341 } 2379 }
2342 2380
2343 if(*c == ')' && !inside_html) { 2381 if(*c == ')' && !inside_html) {
2344 inside_paren--; 2382 inside_paren--;
2383 ret = g_string_append_c(ret, *c);
2384 c++;
2385 }
2386 if(*c == ']' && !inside_html) {
2387 inside_bracket--;
2345 ret = g_string_append_c(ret, *c); 2388 ret = g_string_append_c(ret, *c);
2346 c++; 2389 c++;
2347 } 2390 }
2348 2391
2349 if (*c == 0) 2392 if (*c == 0)
4886 } else { 4929 } else {
4887 return hostname; 4930 return hostname;
4888 } 4931 }
4889 #endif 4932 #endif
4890 } 4933 }
4934
4935 #ifdef _WIN32
4936 void botch_ucs(gchar *ucs_src, gsize len)
4937 {
4938 /* no operation */
4939 }
4940 #else
4941 void botch_ucs(gchar *ucs_src, gsize len)
4942 {
4943 gint i;
4944 guchar *ucs = (guchar *)ucs_src;
4945
4946 g_return_if_fail(ucs_src != NULL);
4947 g_return_if_fail(len > 0);
4948
4949 for(i=0;i<len;i+=2){
4950 switch(*(ucs+i)){
4951 case 0x00:
4952 switch(*(ucs+i+1)){
4953 case 0xa2: // ¢
4954 *(ucs+i) = 0xff;
4955 *(ucs+i+1) = 0xe0;
4956 break;
4957 case 0xa3: // £
4958 *(ucs+i) = 0xff;
4959 *(ucs+i+1) = 0xe1;
4960 break;
4961 case 0xac: // ¬
4962 *(ucs+i) = 0xff;
4963 *(ucs+i+1) = 0xe2;
4964 break;
4965 }
4966 break;
4967 case 0x20: // ‖
4968 if(*(ucs+i+1) == 0x16){
4969 *(ucs+i) = 0x22;
4970 *(ucs+i+1) = 0x25;
4971 }
4972 break;
4973 case 0x22: // −
4974 if(*(ucs+i+1) == 0x12){
4975 *(ucs+i) = 0xff;
4976 *(ucs+i+1) = 0x0d;
4977 }
4978 break;
4979 case 0x30: // 〜
4980 if(*(ucs+i+1) == 0x1c){
4981 *(ucs+i) = 0xff;
4982 *(ucs+i+1) = 0x5e;
4983 }
4984 break;
4985 }
4986 }
4987
4988 }
4989 #endif
4990
4991 #ifdef _WIN32
4992 void sanitize_ucs(gchar *ucs, gsize len)
4993 {
4994 /* no operation */
4995 }
4996 #else
4997 void sanitize_ucs(gchar *ucs_src, gsize len)
4998 {
4999 gint i;
5000 guchar *ucs = (guchar *)ucs_src;
5001
5002 g_return_if_fail(ucs_src != NULL);
5003 g_return_if_fail(len > 0);
5004
5005 for(i=0;i<len;i+=2){
5006 switch(*(ucs+i)){
5007 case 0x22:
5008 switch(*(ucs+i+1)){
5009 case 0x25: // ‖
5010 *(ucs+i) = 0x20;
5011 *(ucs+i+1) = 0x16;
5012 break;
5013 }
5014 break;
5015 case 0xff:
5016 switch(*(ucs+i+1)){
5017 case 0x0d: // −
5018 *(ucs+i) = 0x22;
5019 *(ucs+i+1) = 0x12;
5020 break;
5021 case 0x5e: // 〜
5022 *(ucs+i) = 0x30;
5023 *(ucs+i+1) = 0x1c;
5024 break;
5025 case 0xe0: // ¢
5026 *(ucs+i) = 0x00;
5027 *(ucs+i+1) = 0xa2;
5028 break;
5029 case 0xe1: // £
5030 *(ucs+i) = 0x00;
5031 *(ucs+i+1) = 0xa3;
5032 break;
5033 case 0xe2: // ¬
5034 *(ucs+i) = 0x00;
5035 *(ucs+i+1) = 0xac;
5036 break;
5037 }
5038 break;
5039 }
5040 }
5041 }
5042 #endif
5043
5044 #ifdef _WIN32
5045 gchar *sanitize_utf(const gchar *msg, gsize len, gsize *newlen)
5046 {
5047 g_return_val_if_fail(msg != NULL, NULL);
5048 if(len == -1)
5049 len = strlen(msg);
5050 g_return_val_if_fail(len > 0, NULL);
5051
5052 if(newlen)
5053 *newlen = len;
5054
5055 return g_strndup(msg, len);
5056 }
5057 #else
5058 gchar *sanitize_utf(const gchar *msg, gsize len, gsize *newlen)
5059 {
5060 gint i;
5061 size_t bytes;
5062 guchar *utf;
5063
5064 g_return_val_if_fail(msg != NULL, NULL);
5065 if(len == -1)
5066 len = strlen(msg);
5067 g_return_val_if_fail(len > 0, NULL);
5068
5069 utf = (guchar *)g_strndup(msg, len);
5070
5071 bytes = len;
5072
5073 for(i=0;i<len;i++){
5074 switch(*(utf+i)){
5075 case 0xe2:
5076 if(*(utf+i+1) == 0x88) {
5077 if(*(utf+i+2) == 0xa5) { // ‖
5078 *(utf+i) = 0xe2;
5079 *(utf+i+1) = 0x80;
5080 *(utf+i+2) = 0x96;
5081 }
5082 }
5083 break;
5084 case 0xef:
5085 switch(*(utf+i+1)){
5086 case 0xbc:
5087 if(*(utf+i+2) == 0x8d) { // −
5088 *(utf+i) = 0xe2;
5089 *(utf+i+1) = 0x88;
5090 *(utf+i+2) = 0x92;
5091 }
5092 break;
5093 case 0xbd:
5094 if(*(utf+i+2) == 0x9e) { // 〜
5095 *(utf+i) = 0xe3;
5096 *(utf+i+1) = 0x80;
5097 *(utf+i+2) = 0x9c;
5098 }
5099 break;
5100 case 0xbf:
5101 switch(*(utf+i+2)){
5102 case 0xa0: // ¢
5103 *(utf+i) = 0xc2;
5104 *(utf+i+1) = 0xa2;
5105 memmove(utf+i+2, utf+i+3,
5106 len-i-3); //shorten by 1byte
5107 bytes--;
5108 break;
5109 case 0xa1: // £
5110 *(utf+i) = 0xc2;
5111 *(utf+i+1) = 0xa3;
5112 memmove(utf+i+2, utf+i+3,
5113 len-i-3); //shorten by 1byte
5114 bytes--;
5115 break;
5116 case 0xa2: // ¬
5117 *(utf+i) = 0xc2;
5118 *(utf+i+1) = 0xac;
5119 memmove(utf+i+2, utf+i+3,
5120 len-i-3); //shorten by 1byte
5121 bytes--;
5122 break;
5123 }
5124 break;
5125 }
5126 break;
5127 }
5128 }
5129 *(utf+bytes)= 0x00; //terminate
5130 if(newlen)
5131 *newlen = bytes;
5132 return (gchar *)utf;
5133 }
5134 #endif
5135
5136 #ifdef _WIN32
5137 gchar *botch_utf(const gchar *msg, gsize len, gsize *newlen)
5138 {
5139 g_return_val_if_fail(msg != NULL, NULL);
5140 if(len == -1)
5141 len = strlen(msg);
5142 g_return_val_if_fail(len > 0, NULL);
5143
5144 if(newlen)
5145 *newlen = len;
5146
5147 return g_strndup(msg, len);
5148 }
5149 #else
5150 gchar *botch_utf(const gchar *msg, gsize len, gsize *newlen)
5151 {
5152 int i,bytes;
5153 unsigned char *utf;
5154
5155 g_return_val_if_fail(msg != NULL, NULL);
5156 if(len == -1)
5157 len = strlen(msg);
5158 g_return_val_if_fail(len > 0, NULL);
5159
5160 bytes = len;
5161
5162 utf = g_malloc0(bytes*3/2+1); /* new length might be 3/2 in the worst case */
5163 memcpy(utf, msg, bytes);
5164
5165 for(i=0;i<bytes;i++){
5166 switch(*(utf+i)){
5167 case 0xc2:
5168 switch(*(utf+i+1)){
5169 case 0xa2: // ¢
5170 *(utf+i) = 0xef;
5171 *(utf+i+1) = 0xbf;
5172 memmove(utf+i+3, utf+i+2, bytes-i-2);
5173 *(utf+i+2) = 0xa0;
5174 bytes++;
5175 break;
5176 case 0xa3: // £
5177 *(utf+i) = 0xef;
5178 *(utf+i+1) = 0xbf;
5179 memmove(utf+i+3, utf+i+2, bytes-i-2);
5180 *(utf+i+2) = 0xa1;
5181 bytes++;
5182 break;
5183 case 0xac: // ¬
5184 *(utf+i) = 0xef;
5185 *(utf+i+1) = 0xbf;
5186 memmove(utf+i+3, utf+i+2, bytes-i-2);
5187 *(utf+i+2) = 0xa2;
5188 bytes++;
5189 break;
5190 }
5191 break;
5192 case 0xe2:
5193 switch(*(utf+i+1)){
5194 case 0x80: // ‖
5195 if(*(utf+i+2) == 0x96){
5196 *(utf+i) = 0xe2;
5197 *(utf+i+1) = 0x88;
5198 *(utf+i+2) = 0xa5;
5199 }
5200 break;
5201 case 0x88: // −
5202 if(*(utf+i+1) == 0x92){
5203 *(utf+i) = 0xef;
5204 *(utf+i+1) = 0xbc;
5205 *(utf+i+2) = 0x8d;
5206 }
5207 break;
5208 }
5209 break;
5210 case 0xe3: // 〜
5211 if(*(utf+i+1) == 0x80){
5212 if(*(utf+i+2) == 0x9c){
5213 *(utf+i) = 0xef;
5214 *(utf+i+1) = 0xbd;
5215 *(utf+i+2) = 0x9e;
5216 }
5217 }
5218 break;
5219 } //switch
5220 }
5221 *(utf+bytes) = 0x00; //terminate
5222 if(newlen)
5223 *newlen = bytes;
5224 return (gchar *)utf;
5225 }
5226 #endif