comparison libpurple/protocols/yahoo/util.c @ 28435:4a2adf413aaa

merged with im.pidgin.pidgin
author Yoshiki Yazawa <yaz@honeyplanet.jp>
date Mon, 24 Aug 2009 17:20:46 +0900
parents 94abbb806273 1d2425f2e4ff
children 7c33fccedea8
comparison
equal deleted inserted replaced
28308:f419bf1e5851 28435:4a2adf413aaa
517 xmlnode *html, *cur; 517 xmlnode *html, *cur;
518 GString *cdata = g_string_new(NULL); 518 GString *cdata = g_string_new(NULL);
519 int i, j; 519 int i, j;
520 gboolean no_more_gt_brackets = FALSE; 520 gboolean no_more_gt_brackets = FALSE;
521 const char *match; 521 const char *match;
522 gchar *xmlstr1, *xmlstr2; 522 gchar *xmlstr1, *xmlstr2, *esc;
523 523
524 x_len = strlen(x); 524 x_len = strlen(x);
525 html = xmlnode_new("html"); 525 html = xmlnode_new("html");
526 526
527 cur = html; 527 cur = html;
557 cur = xmlnode_new_child(cur, "font"); 557 cur = xmlnode_new_child(cur, "font");
558 xmlnode_set_attrib(cur, "color", code); 558 xmlnode_set_attrib(cur, "color", code);
559 #endif /* !USE_CSS_FORMATTING */ 559 #endif /* !USE_CSS_FORMATTING */
560 560
561 } else if ((match = g_hash_table_lookup(esc_codes_ht, code))) { 561 } else if ((match = g_hash_table_lookup(esc_codes_ht, code))) {
562 gboolean is_closing_tag; 562 /* Some tags are in the hash table only because we
563 gchar *tag_name; 563 * want to ignore them */
564 564 if (match[0] != '\0') {
565 tag_name = yahoo_markup_get_tag_name(match, &is_closing_tag); 565 gboolean is_closing_tag;
566 yahoo_codes_to_html_add_tag(&cur, match, is_closing_tag, tag_name, FALSE); 566 gchar *tag_name;
567 g_free(tag_name); 567 tag_name = yahoo_markup_get_tag_name(match, &is_closing_tag);
568 yahoo_codes_to_html_add_tag(&cur, match, is_closing_tag, tag_name, FALSE);
569 g_free(tag_name);
570 }
568 571
569 } else { 572 } else {
570 purple_debug_error("yahoo", 573 purple_debug_error("yahoo",
571 "Ignoring unknown ansi code 'ESC[%sm'.\n", code); 574 "Ignoring unknown ansi code 'ESC[%sm'.\n", code);
572 } 575 }
612 615
613 match = g_hash_table_lookup(tags_ht, tag_name); 616 match = g_hash_table_lookup(tags_ht, tag_name);
614 if (match == NULL) { 617 if (match == NULL) {
615 /* Unknown tag. The user probably typed a less-than sign */ 618 /* Unknown tag. The user probably typed a less-than sign */
616 g_string_append_c(cdata, x[i]); 619 g_string_append_c(cdata, x[i]);
617 no_more_gt_brackets = TRUE;
618 g_free(tag); 620 g_free(tag);
619 g_free(tag_name); 621 g_free(tag_name);
620 break; 622 break;
621 } 623 }
622 624
661 /* This probably isn't necessary, especially if we made the outter HTML 663 /* This probably isn't necessary, especially if we made the outter HTML
662 * node an empty span. But the HTML is simpler this way. */ 664 * node an empty span. But the HTML is simpler this way. */
663 xmlstr2 = g_strndup(xmlstr1 + 6, strlen(xmlstr1) - 13); 665 xmlstr2 = g_strndup(xmlstr1 + 6, strlen(xmlstr1) - 13);
664 g_free(xmlstr1); 666 g_free(xmlstr1);
665 667
666 purple_debug_misc("yahoo", "yahoo_codes_to_html: Returning string: '%s'.\n", xmlstr2); 668 esc = g_strescape(x, NULL);
669 purple_debug_misc("yahoo", "yahoo_codes_to_html(%s)=%s\n", esc, xmlstr2);
670 g_free(esc);
671
667 return xmlstr2; 672 return xmlstr2;
668 } 673 }
669 674
670 /* borrowed from gtkimhtml */ 675 /* borrowed from gtkimhtml */
671 #define MAX_FONT_SIZE 7 676 #define MAX_FONT_SIZE 7
672 #define POINT_SIZE(x) (_point_sizes [MIN ((x > 0 ? x : 1), MAX_FONT_SIZE) - 1]) 677 #define POINT_SIZE(x) (_point_sizes [MIN ((x > 0 ? x : 1), MAX_FONT_SIZE) - 1])
673 static const gint _point_sizes [] = { 8, 10, 12, 14, 20, 30, 40 }; 678 static const gint _point_sizes [] = { 8, 10, 12, 14, 20, 30, 40 };
674 679
675 enum fatype
676 {
677 FATYPE_SIZE,
678 FATYPE_COLOR,
679 FATYPE_FACE,
680 FATYPE_JUNK
681 };
682
683 typedef struct 680 typedef struct
684 { 681 {
685 enum fatype type; 682 gboolean bold;
686 union { 683 gboolean italic;
687 int size; 684 gboolean underline;
688 char *color; 685 gboolean in_link;
689 char *face; 686 int font_size;
690 char *junk; 687 char *font_face;
691 } u; 688 char *font_color;
692 } fontattr; 689 } CurrentMsgState;
693
694 static void fontattr_free(fontattr *f)
695 {
696 if (f->type == FATYPE_COLOR)
697 g_free(f->u.color);
698 else if (f->type == FATYPE_FACE)
699 g_free(f->u.face);
700 g_free(f);
701 }
702 690
703 static void yahoo_htc_list_cleanup(GSList *l) 691 static void yahoo_htc_list_cleanup(GSList *l)
704 { 692 {
705 while (l != NULL) { 693 while (l != NULL) {
706 g_free(l->data); 694 g_free(l->data);
707 l = g_slist_delete_link(l, l); 695 l = g_slist_delete_link(l, l);
708 } 696 }
709 } 697 }
710 698
711 static void _parse_font_tag(const char *src, GString *dest, int *i, int *j, 699 static void parse_font_tag(GString *dest, const char *tag_name, const char *tag,
712 int len, GSList **colors, GSList **tags, GQueue *ftattr) 700 GSList **colors, GSList **tags)
713 { 701 {
714 int m, n, vstart; 702 const char *start;
715 gboolean quote = FALSE, done = FALSE; 703 const char *end;
716 704 GData *attributes;
717 m = *j; 705 const char *attribute;
718 706 gboolean needendtag;
719 while (1) { 707 GString *tmp;
720 m++; 708
721 709 purple_markup_find_tag(tag_name, tag, &start, &end, &attributes);
722 if (m >= len) { 710
723 g_string_append(dest, &src[*i]); 711 needendtag = FALSE;
724 *i = len; 712 tmp = g_string_new(NULL);
725 break; 713
714 attribute = g_datalist_get_data(&attributes, "color");
715 if (attribute != NULL) {
716 g_string_append(tmp, *colors ? (*colors)->data : "\033[#000000m");
717 g_string_append_printf(dest, "\033[%sm", attribute);
718 *colors = g_slist_prepend(*colors,
719 g_strdup_printf("\033[%sm", attribute));
720 } else {
721 /* We need to add a value to the colors stack even if we're not
722 * setting a color because we ALWAYS pop exactly 1 element from
723 * this stack for every </font> tag. If we don't add anything
724 * then we'll pop something that we shouldn't when we hit this
725 * corresponding </font>. */
726 *colors = g_slist_prepend(*colors,
727 *colors ? g_strdup((*colors)->data) : g_strdup("\033[#000000m"));
728 }
729
730 attribute = g_datalist_get_data(&attributes, "face");
731 if (attribute != NULL) {
732 needendtag = TRUE;
733 g_string_append(dest, "<font ");
734 g_string_append_printf(dest, "face=\"%s\" ", attribute);
735 }
736
737 attribute = g_datalist_get_data(&attributes, "size");
738 if (attribute != NULL) {
739 if (!needendtag) {
740 needendtag = TRUE;
741 g_string_append(dest, "<font ");
726 } 742 }
727 743
728 if (src[m] == '=') { 744 g_string_append_printf(dest, "size=\"%d\" ",
729 n = vstart = m; 745 POINT_SIZE(strtol(attribute, NULL, 10)));
730 while (1) { 746 }
731 n++; 747
732 748 if (needendtag) {
733 if (n >= len) { 749 dest->str[dest->len-1] = '>';
734 m = n; 750 *tags = g_slist_prepend(*tags, g_strdup("</font>"));
735 break; 751 g_string_free(tmp, TRUE);
736 } 752 } else {
737 753 *tags = g_slist_prepend(*tags, tmp->str);
738 if (src[n] == '"') { 754 g_string_free(tmp, FALSE);
739 if (!quote) { 755 }
740 quote = TRUE; 756
741 vstart = n; 757 g_datalist_clear(&attributes);
742 continue;
743 } else {
744 done = 1;
745 }
746 }
747
748 if (!quote && ((src[n] == ' ') || (src[n] == '>')))
749 done = TRUE;
750
751 if (done) {
752 if (!g_ascii_strncasecmp(&src[*j+1], "FACE", m - *j - 1)) {
753 fontattr *f;
754
755 f = g_new(fontattr, 1);
756 f->type = FATYPE_FACE;
757 f->u.face = g_strndup(&src[vstart+1], n-vstart-1);
758 if (!ftattr)
759 ftattr = g_queue_new();
760 g_queue_push_tail(ftattr, f);
761 m = n;
762 break;
763 } else if (!g_ascii_strncasecmp(&src[*j+1], "SIZE", m - *j - 1)) {
764 fontattr *f;
765
766 f = g_new(fontattr, 1);
767 f->type = FATYPE_SIZE;
768 f->u.size = POINT_SIZE(strtol(&src[vstart+1], NULL, 10));
769 if (!ftattr)
770 ftattr = g_queue_new();
771 g_queue_push_tail(ftattr, f);
772 m = n;
773 break;
774 } else if (!g_ascii_strncasecmp(&src[*j+1], "COLOR", m - *j - 1)) {
775 fontattr *f;
776
777 f = g_new(fontattr, 1);
778 f->type = FATYPE_COLOR;
779 f->u.color = g_strndup(&src[vstart+1], n-vstart-1);
780 if (!ftattr)
781 ftattr = g_queue_new();
782 g_queue_push_head(ftattr, f);
783 m = n;
784 break;
785 } else {
786 fontattr *f;
787
788 f = g_new(fontattr, 1);
789 f->type = FATYPE_JUNK;
790 f->u.junk = g_strndup(&src[*j+1], n-*j);
791 if (!ftattr)
792 ftattr = g_queue_new();
793 g_queue_push_tail(ftattr, f);
794 m = n;
795 break;
796 }
797
798 }
799 }
800 }
801
802 if (src[m] == ' ')
803 *j = m;
804
805 if (src[m] == '>') {
806 gboolean needendtag = FALSE;
807 fontattr *f;
808 GString *tmp = g_string_new(NULL);
809
810 if (!g_queue_is_empty(ftattr)) {
811 while ((f = g_queue_pop_tail(ftattr))) {
812 switch (f->type) {
813 case FATYPE_SIZE:
814 if (!needendtag) {
815 needendtag = TRUE;
816 g_string_append(dest, "<font ");
817 }
818
819 g_string_append_printf(dest, "size=\"%d\" ", f->u.size);
820 break;
821 case FATYPE_FACE:
822 if (!needendtag) {
823 needendtag = TRUE;
824 g_string_append(dest, "<font ");
825 }
826
827 g_string_append_printf(dest, "face=\"%s\" ", f->u.face);
828 break;
829 case FATYPE_JUNK:
830 if (!needendtag) {
831 needendtag = TRUE;
832 g_string_append(dest, "<font ");
833 }
834
835 g_string_append(dest, f->u.junk);
836 break;
837
838 case FATYPE_COLOR:
839 if (needendtag) {
840 g_string_append(tmp, "</font>");
841 dest->str[dest->len-1] = '>';
842 needendtag = TRUE;
843 }
844
845 g_string_append(tmp, *colors ? (*colors)->data : "\033[#000000m");
846 g_string_append_printf(dest, "\033[%sm", f->u.color);
847 *colors = g_slist_prepend(*colors,
848 g_strdup_printf("\033[%sm", f->u.color));
849 break;
850 }
851 fontattr_free(f);
852 }
853
854 g_queue_free(ftattr);
855 ftattr = NULL;
856
857 if (needendtag) {
858 dest->str[dest->len-1] = '>';
859 *tags = g_slist_prepend(*tags, g_strdup("</font>"));
860 g_string_free(tmp, TRUE);
861 } else {
862 *tags = g_slist_prepend(*tags, tmp->str);
863 g_string_free(tmp, FALSE);
864 }
865 }
866
867 *i = *j = m;
868 break;
869 }
870 }
871 } 758 }
872 759
873 char *yahoo_html_to_codes(const char *src) 760 char *yahoo_html_to_codes(const char *src)
874 { 761 {
875 GSList *colors = NULL; 762 GSList *colors = NULL;
763
764 /**
765 * A stack of char*s where each char* is the string that should be
766 * appended to dest in order to close all the tags that were opened
767 * by a <font> tag.
768 */
876 GSList *tags = NULL; 769 GSList *tags = NULL;
770
877 size_t src_len; 771 size_t src_len;
878 int i, j; 772 int i, j;
879 GString *dest; 773 GString *dest;
880 char *esc; 774 char *esc;
881 GQueue *ftattr = NULL; 775 gboolean no_more_gt_brackets = FALSE;
882 gboolean no_more_specials = FALSE; 776 gchar *tag, *tag_name;
777 gboolean is_closing_tag;
778 CurrentMsgState current_state;
779
780 memset(&current_state, 0, sizeof(current_state));
883 781
884 src_len = strlen(src); 782 src_len = strlen(src);
885 dest = g_string_sized_new(src_len); 783 dest = g_string_sized_new(src_len);
886 784
887 for (i = 0; i < src_len; i++) { 785 for (i = 0; i < src_len; i++) {
888 786 if (src[i] == '<' && !no_more_gt_brackets) {
889 if (src[i] == '<' && !no_more_specials) { 787 /* The start of an HTML tag */
890 j = i; 788 j = i;
891 789
892 while (1) { 790 while (j++ < src_len) {
893 j++; 791 if (src[j] != '>') {
894 792 if (src[j] == '"') {
895 if (j >= src_len) { /* no '>' */ 793 /* We're inside a quoted attribute value. Skip to the end */
794 j++;
795 while (j != src_len && src[j] != '"')
796 j++;
797 } else if (src[j] == '\'') {
798 /* We're inside a quoted attribute value. Skip to the end */
799 j++;
800 while (j != src_len && src[j] != '\'')
801 j++;
802 }
803 if (j != src_len)
804 /* Keep looking for the end of this tag */
805 continue;
806
807 /* This < has no corresponding > */
896 g_string_append_c(dest, src[i]); 808 g_string_append_c(dest, src[i]);
897 no_more_specials = TRUE; 809 no_more_gt_brackets = TRUE;
898 break; 810 break;
899 } 811 }
900 812
901 if (src[j] == '<') { 813 tag = g_strndup(src + i, j - i + 1);
902 /* FIXME: This doesn't convert outgoing entities. 814 tag_name = yahoo_markup_get_tag_name(tag, &is_closing_tag);
903 * However, I suspect this case may never 815
904 * happen anymore because of the entities. 816 if (g_str_equal(tag_name, "a")) {
817 const char *start;
818 const char *end;
819 GData *attributes;
820 const char *attribute;
821
822 /*
823 * TODO: Ideally we would replace this:
824 * <a href="http://pidgin.im/">Pidgin</a>
825 * with this:
826 * Pidgin (http://pidgin.im/)
827 *
828 * Currently we drop the text within the <a> tag and
829 * just show the URL. Doing it the fancy way is
830 * complicated when dealing with HTML tags within the
831 * <a> tag.
905 */ 832 */
906 g_string_append_len(dest, &src[i], j - i); 833
907 i = j - 1; 834 /* Append the URL */
908 if (ftattr) { 835 purple_markup_find_tag(tag_name, tag, &start, &end, &attributes);
909 fontattr *f; 836 attribute = g_datalist_get_data(&attributes, "href");
910 837 if (attribute != NULL) {
911 while ((f = g_queue_pop_head(ftattr))) 838 if (purple_str_has_prefix(attribute, "mailto:"))
912 fontattr_free(f); 839 attribute += 7;
913 g_queue_free(ftattr); 840 g_string_append(dest, attribute);
914 ftattr = NULL;
915 } 841 }
916 break; 842 g_datalist_clear(&attributes);
843
844 /* Skip past the closing </a> tag */
845 end = purple_strcasestr(src + j, "</a>");
846 if (end != NULL)
847 j = end - src + 3;
848
849 } else if (g_str_equal(tag_name, "font")) {
850 parse_font_tag(dest, tag_name, tag, &colors, &tags);
851 } else if (g_str_equal(tag_name, "b")) {
852 g_string_append(dest, "\033[1m");
853 current_state.bold = TRUE;
854 } else if (g_str_equal(tag_name, "/b")) {
855 if (current_state.bold) {
856 g_string_append(dest, "\033[x1m");
857 current_state.bold = FALSE;
858 }
859 } else if (g_str_equal(tag_name, "i")) {
860 current_state.italic = TRUE;
861 g_string_append(dest, "\033[2m");
862 } else if (g_str_equal(tag_name, "/i")) {
863 if (current_state.italic) {
864 g_string_append(dest, "\033[x2m");
865 current_state.italic = FALSE;
866 }
867 } else if (g_str_equal(tag_name, "u")) {
868 current_state.underline = TRUE;
869 g_string_append(dest, "\033[4m");
870 } else if (g_str_equal(tag_name, "/u")) {
871 if (current_state.underline) {
872 g_string_append(dest, "\033[x4m");
873 current_state.underline = FALSE;
874 }
875 } else if (g_str_equal(tag_name, "/a")) {
876 /* Do nothing */
877 } else if (g_str_equal(tag_name, "br")) {
878 g_string_append_c(dest, '\n');
879 } else if (g_str_equal(tag_name, "/font")) {
880 if (tags != NULL) {
881 char *etag = tags->data;
882 tags = g_slist_delete_link(tags, tags);
883 g_string_append(dest, etag);
884 if (colors != NULL) {
885 g_free(colors->data);
886 colors = g_slist_delete_link(colors, colors);
887 }
888 g_free(etag);
889 }
917 } 890 }
918 891
919 if (src[j] == ' ') { 892 i = j;
920 if (!g_ascii_strncasecmp(&src[i+1], "BODY", j - i - 1)) { 893 g_free(tag);
921 char *t = strchr(&src[j], '>'); 894 g_free(tag_name);
922 if (!t) { 895 break;
923 g_string_append(dest, &src[i]);
924 i = src_len;
925 break;
926 } else {
927 i = t - src;
928 break;
929 }
930 } else if (!g_ascii_strncasecmp(&src[i+1], "A HREF=\"", j - i - 1)) {
931 j += 7;
932 g_string_append(dest, "\033[lm");
933 if (purple_str_has_prefix(src + j, "mailto:"))
934 j += sizeof("mailto:") - 1;
935 while (1) {
936 g_string_append_c(dest, src[j]);
937 if (++j >= src_len) {
938 i = src_len;
939 break;
940 }
941 if (src[j] == '"') {
942 g_string_append(dest, "\033[xlm");
943 while (1) {
944 if (++j >= src_len) {
945 i = src_len;
946 break;
947 }
948 if (!g_ascii_strncasecmp(&src[j], "</A>", 4)) {
949 j += 3;
950 break;
951 }
952 }
953 i = j;
954 break;
955 }
956 }
957 } else if (!g_ascii_strncasecmp(&src[i+1], "SPAN", j - i - 1)) { /* drop span tags */
958 while (1) {
959 if (++j >= src_len) {
960 g_string_append(dest, &src[i]);
961 i = src_len;
962 break;
963 }
964 if (src[j] == '>') {
965 i = j;
966 break;
967 }
968 }
969 } else if (g_ascii_strncasecmp(&src[i+1], "FONT", j - i - 1)) { /* not interested! */
970 while (1) {
971 if (++j >= src_len) {
972 g_string_append(dest, &src[i]);
973 i = src_len;
974 break;
975 }
976 if (src[j] == '>') {
977 g_string_append_len(dest, &src[i], j - i + 1);
978 i = j;
979 break;
980 }
981 }
982 } else { /* yay we have a font tag */
983 _parse_font_tag(src, dest, &i, &j, src_len, &colors, &tags, ftattr);
984 }
985
986 break;
987 }
988
989 if (src[j] == '>') {
990 /* This has some problems like the FIXME for the
991 * '<' case. and like that case, I suspect the case
992 * that this has problems is won't happen anymore anyway.
993 */
994 int sublen = j - i - 1;
995
996 if (sublen) {
997 if (!g_ascii_strncasecmp(&src[i+1], "B", sublen)) {
998 g_string_append(dest, "\033[1m");
999 } else if (!g_ascii_strncasecmp(&src[i+1], "/B", sublen)) {
1000 g_string_append(dest, "\033[x1m");
1001 } else if (!g_ascii_strncasecmp(&src[i+1], "I", sublen)) {
1002 g_string_append(dest, "\033[2m");
1003 } else if (!g_ascii_strncasecmp(&src[i+1], "/I", sublen)) {
1004 g_string_append(dest, "\033[x2m");
1005 } else if (!g_ascii_strncasecmp(&src[i+1], "U", sublen)) {
1006 g_string_append(dest, "\033[4m");
1007 } else if (!g_ascii_strncasecmp(&src[i+1], "/U", sublen)) {
1008 g_string_append(dest, "\033[x4m");
1009 } else if (!g_ascii_strncasecmp(&src[i+1], "/A", sublen)) {
1010 g_string_append(dest, "\033[xlm");
1011 } else if (!g_ascii_strncasecmp(&src[i+1], "BR", sublen)) {
1012 g_string_append_c(dest, '\n');
1013 } else if (!g_ascii_strncasecmp(&src[i+1], "/BODY", sublen)) {
1014 /* mmm, </body> tags. *BURP* */
1015 } else if (!g_ascii_strncasecmp(&src[i+1], "/SPAN", sublen)) {
1016 /* </span> tags. dangerously close to </spam> */
1017 } else if (!g_ascii_strncasecmp(&src[i+1], "/FONT", sublen) && tags != NULL) {
1018 char *etag;
1019
1020 etag = tags->data;
1021 tags = g_slist_delete_link(tags, tags);
1022 if (etag) {
1023 g_string_append(dest, etag);
1024 if (!strcmp(etag, "</font>")) {
1025 if (colors != NULL) {
1026 g_free(colors->data);
1027 colors = g_slist_delete_link(colors, colors);
1028 }
1029 }
1030 g_free(etag);
1031 }
1032 } else {
1033 g_string_append_len(dest, &src[i], j - i + 1);
1034 }
1035 } else {
1036 g_string_append_len(dest, &src[i], j - i + 1);
1037 }
1038
1039 i = j;
1040 break;
1041 }
1042
1043 } 896 }
1044 897
1045 } else { 898 } else {
1046 const char *entity; 899 const char *entity;
1047 int length; 900 int length;
1056 g_string_append_c(dest, src[i]); 909 g_string_append_c(dest, src[i]);
1057 } 910 }
1058 } 911 }
1059 912
1060 esc = g_strescape(dest->str, NULL); 913 esc = g_strescape(dest->str, NULL);
1061 purple_debug_misc("yahoo", "yahoo_html_to_codes: Returning string: '%s'.\n", esc); 914 purple_debug_misc("yahoo", "yahoo_html_to_codes(%s)=%s\n", src, esc);
1062 g_free(esc); 915 g_free(esc);
1063 916
1064 yahoo_htc_list_cleanup(colors); 917 yahoo_htc_list_cleanup(colors);
1065 yahoo_htc_list_cleanup(tags); 918 yahoo_htc_list_cleanup(tags);
1066 919