comparison src/util.c @ 9161:c3fa2ad099a2

[gaim-migrate @ 9946] wing added support for yahoo profiles in, well pretty much every language. Looks pretty impressive to me. Someone may want to double check his src/util.c changes. I think we have some crazy patch writers who know those functions better than me. This also introduces a couple of warning because wing didn't add his new util.c function to util.h. Rather than adding it myself, I'm going to bug him to add it and document it. committer: Tailor Script <tailor@pidgin.im>
author Tim Ringenbach <marv@pidgin.im>
date Wed, 02 Jun 2004 00:44:51 +0000
parents dabfa4184db8
children 456ef1f4ba19
comparison
equal deleted inserted replaced
9160:dabfa4184db8 9161:c3fa2ad099a2
765 765
766 if (check_value != '\0' && *p == check_value) 766 if (check_value != '\0' && *p == check_value)
767 return FALSE; 767 return FALSE;
768 768
769 q = strstr(p, end_token); 769 q = strstr(p, end_token);
770
771 /* Trim leading blanks */
772 while (*p != '\n' && g_ascii_isspace(*p)) {
773 p += 1;
774 }
775
776 /* Trim trailing blanks */
777 while (q > p && g_ascii_isspace(*(q - 1))) {
778 q -= 1;
779 }
780
781 /* Don't bother with null strings */
782 if (p == q)
783 return FALSE;
770 784
771 if (q != NULL && (!no_value_token || 785 if (q != NULL && (!no_value_token ||
772 (no_value_token && strncmp(p, no_value_token, 786 (no_value_token && strncmp(p, no_value_token,
773 strlen(no_value_token))))) 787 strlen(no_value_token)))))
774 { 788 {
1206 *plain_out = g_strdup(plain->str); 1220 *plain_out = g_strdup(plain->str);
1207 g_string_free(xhtml, TRUE); 1221 g_string_free(xhtml, TRUE);
1208 g_string_free(plain, TRUE); 1222 g_string_free(plain, TRUE);
1209 } 1223 }
1210 1224
1225 /* The following are probably reasonable changes:
1226 * - \n should be converted to a normal space
1227 * - in addition to <br>, <p> and <div> etc. should also be converted into \n
1228 * - We want to turn </td>#whitespace<td> sequences into a single blank
1229 * - We want to turn </tr>#whitespace<tr> sequences into a single \n
1230 * We should remove all <script>...</script> etc. This should be fixed some time
1231 */
1232
1211 char * 1233 char *
1212 gaim_markup_strip_html(const char *str) 1234 gaim_markup_strip_html(const char *str)
1213 { 1235 {
1214 int i, j, k; 1236 int i, j, k;
1215 gboolean visible = TRUE; 1237 gboolean visible = TRUE;
1238 gboolean closing_td_p = FALSE;
1216 gchar *str2; 1239 gchar *str2;
1217 1240
1218 if(!str) 1241 if(!str)
1219 return NULL; 1242 return NULL;
1220 1243
1222 1245
1223 for (i = 0, j = 0; str2[i]; i++) 1246 for (i = 0, j = 0; str2[i]; i++)
1224 { 1247 {
1225 if (str2[i] == '<') 1248 if (str2[i] == '<')
1226 { 1249 {
1227 if (strncasecmp(str2 + i, "<br>", 4) == 0) 1250 if (strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p)
1228 { 1251 {
1229 str2[j++] = '\n'; 1252 str2[j++] = ' ';
1230 i = i + 3; 1253 visible = TRUE;
1231 continue; 1254 }
1255 else if (strncasecmp(str2 + i, "</td>", 5) == 0)
1256 {
1257 closing_td_p = TRUE;
1258 visible = FALSE;
1259 }
1260 else
1261 {
1262 closing_td_p = FALSE;
1263 visible = TRUE;
1232 } 1264 }
1233 1265
1234 k = i + 1; 1266 k = i + 1;
1235 1267
1236 if(g_ascii_isspace(str2[k])) 1268 if(g_ascii_isspace(str2[k]))
1237 visible = TRUE; 1269 visible = TRUE;
1238 else 1270 else
1239 { 1271 {
1240 while (str2[k]) 1272 /* Scan until we end the tag either implicitly (closed start
1273 * tag) or explicitly, using a sloppy method (i.e., < or >
1274 * inside quoted attributes will screw us up)
1275 */
1276 while (str2[k] && str2[k] != '<' && str2[k] != '>')
1241 { 1277 {
1242 if (str2[k] == '<')
1243 {
1244 visible = TRUE;
1245 break;
1246 }
1247
1248 if (str2[k] == '>')
1249 {
1250 visible = FALSE;
1251 break;
1252 }
1253
1254 k++; 1278 k++;
1255 } 1279 }
1256 } 1280 /* Check for tags which should be mapped to newline */
1257 } 1281 if (strncasecmp(str2 + i, "<p>", 3) == 0
1258 else if (str2[i] == '>' && !visible) 1282 || strncasecmp(str2 + i, "<tr", 3) == 0
1283 || strncasecmp(str2 + i, "<br", 3) == 0
1284 || strncasecmp(str2 + i, "<li", 3) == 0
1285 || strncasecmp(str2 + i, "<div", 4) == 0
1286 || strncasecmp(str2 + i, "</table>", 8) == 0)
1287 {
1288 str2[j++] = '\n';
1289 }
1290 /* Update the index and continue checking after the tag */
1291 i = (str2[k] == '<')? k - 1: k;
1292 continue;
1293 }
1294 }
1295 else if (!g_ascii_isspace(str2[i]))
1259 { 1296 {
1260 visible = TRUE; 1297 visible = TRUE;
1261 continue;
1262 } 1298 }
1263 1299
1264 if (str2[i] == '&' && strncasecmp(str2 + i, "&quot;", 6) == 0) 1300 if (str2[i] == '&' && strncasecmp(str2 + i, "&quot;", 6) == 0)
1265 { 1301 {
1266 str2[j++] = '\"'; 1302 str2[j++] = '\"';
1288 i = i + 3; 1324 i = i + 3;
1289 continue; 1325 continue;
1290 } 1326 }
1291 1327
1292 if (visible) 1328 if (visible)
1293 str2[j++] = str2[i]; 1329 str2[j++] = g_ascii_isspace(str2[i])? ' ': str2[i];
1294 } 1330 }
1295 1331
1296 str2[j] = '\0'; 1332 str2[j] = '\0';
1297 1333
1298 return str2; 1334 return str2;
2669 } 2705 }
2670 2706
2671 return(NULL); 2707 return(NULL);
2672 } 2708 }
2673 2709
2710 char *
2711 gaim_utf8_ncr_decode(const char *in)
2712 {
2713 GString *out = g_string_new("");
2714 int i;
2715
2716 g_return_val_if_fail(in != NULL, NULL);
2717 g_return_val_if_fail(g_utf8_validate(in, -1, NULL), NULL);
2718
2719 for (i = 0; in[i]; i += 1) {
2720 gboolean ncr_found_p = FALSE;
2721 if (in[i] == '&' && in[i + 1] == '#' && isdigit(in[i + 2])) {
2722 gunichar wc;
2723 int j;
2724 for (wc = 0, j = i + 2; isdigit(in[j]); j += 1) {
2725 wc *= 10;
2726 wc += in[j] - '0';
2727 }
2728 if (in[j] == ';') { /* Technically not completely correct */
2729 g_string_append_unichar(out, wc);
2730 i = j;
2731 ncr_found_p = TRUE;
2732 }
2733 }
2734 if (!ncr_found_p) {
2735 g_string_append_c(out, in[i]);
2736 }
2737 }
2738 return g_string_free(out, FALSE);
2739 }
2740
2674 int 2741 int
2675 gaim_utf8_strcasecmp(const char *a, const char *b) 2742 gaim_utf8_strcasecmp(const char *a, const char *b)
2676 { 2743 {
2677 char *a_norm = NULL; 2744 char *a_norm = NULL;
2678 char *b_norm = NULL; 2745 char *b_norm = NULL;