changeset 227:539a0fa7f030 trunk

[svn] Unicode support fixes, based on an XMMS patch written by Ilya Konstantinov. <ikonst@users.sourceforge.net>: - Fixes UTF16 to UTF8 down-conversion. - Fixes length management of UTF8 entities. - Doesn't break ASCII support. - Corrects several issues with the original implementation.
author nenolod
date Fri, 25 Nov 2005 20:23:40 -0800
parents f6ad670bb500
children 08973a746a3e
files Plugins/Input/mpg123/id3.c Plugins/Input/mpg123/id3_frame_content.c Plugins/Input/mpg123/id3_frame_text.c Plugins/Input/mpg123/id3_frame_url.c Plugins/Input/mpg123/xmms-id3.h
diffstat 5 files changed, 276 insertions(+), 183 deletions(-) [+]
line wrap: on
line diff
--- a/Plugins/Input/mpg123/id3.c	Fri Nov 25 19:54:46 2005 -0800
+++ b/Plugins/Input/mpg123/id3.c	Fri Nov 25 20:23:40 2005 -0800
@@ -39,6 +39,19 @@
 #include "xmms-id3.h"
 #include "id3_header.h"
 
+char *
+id3_utf16_to_ascii(void *utf16)
+{
+    char ascii[256];
+    char *uc = (char *) utf16 + 2;
+    int i;
+
+    for (i = 0; *uc != 0 && i < sizeof(ascii); i++, uc += 2)
+        ascii[i] = *uc;
+
+    ascii[i] = 0;
+    return g_strdup(ascii);
+}
 
 /*
 **
--- a/Plugins/Input/mpg123/id3_frame_content.c	Fri Nov 25 19:54:46 2005 -0800
+++ b/Plugins/Input/mpg123/id3_frame_content.c	Fri Nov 25 20:23:40 2005 -0800
@@ -28,6 +28,7 @@
 
 #include <glib.h>
 #include <glib/gi18n.h>
+#include <string.h>
 
 #include "xmms-id3.h"
 
@@ -44,95 +45,95 @@
 char *
 id3_get_content(struct id3_frame *frame)
 {
-    char *text, *text_beg, *ptr;
-    char buffer[256];
-    int spc = sizeof(buffer) - 1;
+     gchar *text, *text_it;
+
+     /* Type check */
+     if (frame->fr_desc->fd_id != ID3_TCON)
+         return NULL;
 
-    /* Type check */
-    if (frame->fr_desc->fd_id != ID3_TCON)
-        return NULL;
+     /* Check if frame is compressed */
+     if (id3_decompress_frame(frame) == -1)
+         return NULL;
+
+     ID3_FRAME_DEFINE_CURSOR(frame);
 
-    /* Check if frame is compressed */
-    if (id3_decompress_frame(frame) == -1)
+     guint8 encoding;
+     ID3_FRAME_READ_OR_RETVAL(encoding, NULL);
+
+     text = text_it = id3_string_decode(encoding, cursor, length);
+
+     if (text == NULL)
         return NULL;
 
-    if (*(guint8 *) frame->fr_data == ID3_ENCODING_ISO_8859_1)
-        text_beg = text = g_strdup((char *) frame->fr_data + 1);
-    else
-        text_beg = text = id3_utf16_to_ascii((char *) frame->fr_data + 1);
+     /*
+      * Expand ID3v1 genre numbers.
+      */
+     while ((text_it = strstr(text_it, "(")) != NULL)
+     {
+         gchar* replace = NULL;
+         gchar* ref_start = text_it + 1;
 
-    /*
-     * If content is just plain text, return it.
-     */
-    if (text[0] != '(') {
-        return text;
-    }
+         if (*ref_start == ')')
+         {
+            /* False alarm */
+            ++text_it;
+            continue;
+         }
+
+         gsize ref_size = strstr(ref_start, ")") - ref_start;
 
-    /*
-     * Expand ID3v1 genre numbers.
-     */
-    ptr = buffer;
-    while (text[0] == '(' && text[1] != '(' && spc > 0) {
-        const char *genre;
-        int num = 0;
-
-        if (text[1] == 'R' && text[2] == 'X') {
-            text += 4;
-            genre = _(" (Remix)");
-            if (ptr == buffer)
-                genre++;
-
-        }
-        else if (text[1] == 'C' && text[2] == 'R') {
-            text += 4;
-            genre = _(" (Cover)");
-            if (ptr == buffer)
-                genre++;
+         if (strncmp(ref_start, "RX", ref_size) == 0)
+         {
+            replace = _("Remix");
+         }
+         else if (strncmp(ref_start, "CR", ref_size) == 0)
+         {
+            replace = _("Cover");
+         }
+         else
+         {
+             /* Maybe an ID3v1 genre? */
+             int genre_number;
+             gchar* genre_number_str = g_strndup(ref_start, ref_size);
+             if (sscanf(genre_number_str, "%d", &genre_number) > 0)
+             {
+                 /* Boundary check */
+                 if (genre_number >= sizeof(mpg123_id3_genres) / sizeof(char *))
+                     continue;
 
-        }
-        else {
-            /* Get ID3v1 genre number */
-            text++;
-            while (*text != ')') {
-                num *= 10;
-                num += *text++ - '0';
-            }
-            text++;
+                 replace = gettext(mpg123_id3_genres[genre_number]);
+             }
+         }
 
-            /* Boundary check */
-            if (num >= sizeof(mpg123_id3_genres) / sizeof(char *))
-                continue;
-
-            genre = gettext(mpg123_id3_genres[num]);
-
-            if (ptr != buffer && spc-- > 0)
-                *ptr++ = '/';
-        }
+         if (replace != NULL)
+         {
+             /* Amazingly hairy code to replace a part of the original genre string
+                with 'replace'. */
+             gchar* copy = g_malloc(strlen(text) - ref_size + strlen(replace) + 1);
+             gsize pos = 0;
+             gsize copy_size;
 
-        /* Expand string into buffer */
-        while (*genre != '\0' && spc > 0) {
-            *ptr++ = *genre++;
-            spc--;
-        }
-    }
+             /* Copy the part before the replaced part */
+             copy_size = ref_start - text;
+             memcpy(copy + pos, text, copy_size);
+             pos += copy_size;
+             /* Copy the replacement instead of the original reference */
+             copy_size = strlen(replace);
+             memcpy(copy + pos, replace, copy_size);
+             pos += copy_size;
+             /* Copy the rest, including the null */
+             memcpy(copy + pos, ref_start + ref_size, strlen(ref_start + ref_size)+1);
 
-    /*
-     * Add plaintext refinement.
-     */
-    if (*text == '(')
-        text++;
-    if (*text != '\0' && ptr != buffer && spc-- > 0)
-        *ptr++ = ' ';
-    while (*text != '\0' && spc > 0) {
-        *ptr++ = *text++;
-        spc--;
-    }
-    *ptr = '\0';
+             /* Put into original variables */
+             gsize offset = text_it - text;
+             g_free(text);
+             text = copy;
+             text_it = text + offset;
+         }
 
-    g_free(text_beg);
+         ++text_it;
+     } 
 
-    /*
-     * Return the expanded content string.
-     */
-    return g_strdup(buffer);
+     return text;
+
 }
--- a/Plugins/Input/mpg123/id3_frame_text.c	Fri Nov 25 19:54:46 2005 -0800
+++ b/Plugins/Input/mpg123/id3_frame_text.c	Fri Nov 25 20:23:40 2005 -0800
@@ -34,22 +34,6 @@
 #include "xmms-id3.h"
 #include "id3_header.h"
 
-
-char *
-id3_utf16_to_ascii(void *utf16)
-{
-    char ascii[256];
-    char *uc = (char *) utf16 + 2;
-    int i;
-
-    for (i = 0; *uc != 0 && i < sizeof(ascii); i++, uc += 2)
-        ascii[i] = *uc;
-
-    ascii[i] = 0;
-    return g_strdup(ascii);
-}
-
-
 /*
  * Function id3_get_encoding (frame)
  *
@@ -78,7 +62,12 @@
     if (id3_decompress_frame(frame) == -1)
         return -1;
 
-    return *(gint8 *) frame->fr_data;
+    ID3_FRAME_DEFINE_CURSOR(frame);
+    
+    guint8 encoding;
+    ID3_FRAME_READ_OR_RETVAL(encoding, -1);
+
+    return encoding;
 }
 
 
@@ -119,6 +108,96 @@
     return 0;
 }
 
+/* Get size of string in bytes including null. */
+gsize id3_string_size(guint8 encoding, const void* text, gsize max_size)
+{
+    switch ( encoding ) {
+    case ID3_ENCODING_ISO_8859_1:
+    case ID3_ENCODING_UTF8:
+    {
+        const guint8* text8 = text;
+        while ( (max_size >= sizeof(*text8)) && (*text8 != 0) )
+        {
+    	   ++text8;
+    	   max_size -= sizeof(*text8);
+    	}
+        
+        if (max_size >= sizeof(*text8))
+        {
+           ++text8;
+           max_size -= sizeof(*text8);
+        }
+        
+        return text8 - (guint8*)text;
+    }
+    case ID3_ENCODING_UTF16:
+    case ID3_ENCODING_UTF16BE:
+    {
+        const guint16* text16 = (guint16*)text;
+        while ( (max_size > 0) && (*text16 != 0) )
+        {
+    	   ++text16;
+    	   max_size -= sizeof(*text16);
+    	}
+    	
+    	if (max_size > 0)
+    	{
+           ++text16;
+           max_size -= sizeof(*text16);
+        }
+
+        return (guint8*)text16 - (guint8*)text;
+    }
+    default:
+        return 0;
+    }
+}
+
+/* 
+Returns a newly-allocated UTF-8 string in the locale's encoding.
+max_size specifies the maximum size of 'text', including terminating nulls.
+*/
+gchar* id3_string_decode(guint8 encoding, const void* text, gsize max_size)
+{
+    /* Otherwise, we'll end up passing -1 to functions, eliminating safety benefits. */
+    if (max_size <= 0)
+       return NULL;
+
+    switch( encoding )
+    {
+        case ID3_ENCODING_ISO_8859_1:
+        {
+            return g_locale_to_utf8((const gchar*)text, max_size, NULL, NULL, NULL);
+        }
+        case ID3_ENCODING_UTF8:
+        {
+            if (g_utf8_validate((const gchar*)text, max_size, NULL))
+               return g_strndup((const gchar*)text, max_size);
+            else
+               return NULL;
+        }
+        case ID3_ENCODING_UTF16:
+        {
+            gsize size_bytes = id3_string_size(encoding, text, max_size);
+            gchar* utf8 = g_convert((const gchar*)text, size_bytes, "UTF-8", "UTF-16", NULL, NULL, NULL);
+            /* If conversion passes on the BOM as-is, we strip it. */
+            if (g_utf8_get_char(utf8) == 0xfeff)
+            {
+               gchar* new_utf8 = g_strdup(utf8+3);
+               g_free(utf8);
+               utf8 = new_utf8;
+            }
+            return utf8;
+        }
+        case ID3_ENCODING_UTF16BE:
+        {
+            gsize size_bytes = id3_string_size(encoding, text, max_size);
+            return g_convert((const gchar*)text, size_bytes, "UTF-8", "UTF-16BE", NULL, NULL, NULL);
+        }
+        default:
+            return NULL;
+    }
+}
 
 /*
  * Function id3_get_text (frame)
@@ -193,10 +272,12 @@
     if (id3_decompress_frame(frame) == -1)
         return NULL;
 
-    if (*(guint8 *) frame->fr_data == ID3_ENCODING_ISO_8859_1)
-        return g_strdup((char *) frame->fr_data + 1);
-    else
-        return id3_utf16_to_ascii((char *) frame->fr_data + 1);
+    ID3_FRAME_DEFINE_CURSOR(frame);
+    
+    guint8 encoding;
+    ID3_FRAME_READ_OR_RETVAL(encoding, NULL);
+
+    return id3_string_decode(encoding, cursor, length);
 }
 
 
@@ -214,46 +295,23 @@
 
     /* Check if frame is compressed */
     if (id3_decompress_frame(frame) == -1)
-        return -1;
+	    return -1;
 
-    /*
-     * Generate integer according to encoding.
-     */
-    switch (*(guint8 *) frame->fr_data) {
-    case ID3_ENCODING_ISO_8859_1:
-        {
-            char *text = ((char *) frame->fr_data) + 1;
-
-            while (*text >= '0' && *text <= '9') {
-                number *= 10;
-                number += *text - '0';
-                text++;
-            }
+    ID3_FRAME_DEFINE_CURSOR(frame);
+    
+    guint8 encoding;
+    ID3_FRAME_READ_OR_RETVAL(encoding, number);
 
-            return number;
-        }
-    case ID3_ENCODING_UTF16:
-        {
-            char *text = ((char *) frame->fr_data) + 3;
-
-/*  	if (*(gint16 *) frame->fr_data == 0xfeff) */
-/*  	    text++; */
-
-            while (*text >= '0' && *text <= '9') {
-                number *= 10;
-                number += *text - '0';
-                text++;
-            }
-
-            return number;
-        }
-
-    default:
-        return -1;
+    gchar* number_str = id3_string_decode(encoding, cursor, length);
+    if (number_str != NULL)
+    {
+       sscanf(number_str, "%d", &number);
+       g_free(number_str);
     }
+    
+    return number;
 }
 
-
 /*
  * Function id3_set_text (frame, text)
  *
@@ -342,9 +400,7 @@
      */
     *(gint8 *) frame->fr_raw_data = ID3_ENCODING_ISO_8859_1;
     text = (char *) frame->fr_raw_data + 1;
-    while (--pos >= 0)
-        *text++ = buf[pos];
-    *text = '\0';
+    strcpy(text, buf);
 
     frame->fr_altered = 1;
     frame->fr_owner->id3_altered = 1;
--- a/Plugins/Input/mpg123/id3_frame_url.c	Fri Nov 25 19:54:46 2005 -0800
+++ b/Plugins/Input/mpg123/id3_frame_url.c	Fri Nov 25 20:23:40 2005 -0800
@@ -30,7 +30,7 @@
 #include "xmms-id3.h"
 #include "id3_header.h"
 
-
+#include <string.h>
 
 /*
  * Function id3_get_url (frame)
@@ -42,45 +42,34 @@
 id3_get_url(struct id3_frame *frame)
 {
     /* Type check */
-    if (frame->fr_desc->fd_idstr[0] != 'W')
-        return NULL;
+    if ( frame->fr_desc->fd_idstr[0] != 'W' )
+	return NULL;
 
     /* Check if frame is compressed */
     if (id3_decompress_frame(frame) == -1)
-        return NULL;
-
-    if (frame->fr_desc->fd_id == ID3_WXXX) {
-        /*
-         * This is a user defined link frame.  Skip the description.
-         */
-        switch (*(guint8 *) frame->fr_data) {
-        case ID3_ENCODING_ISO_8859_1:
-            {
-                char *text = (char *) frame->fr_data + 1;
-
-                while (*text != 0)
-                    text++;
-
-                return g_strdup(++text);
-            }
-        case ID3_ENCODING_UTF16:
-            {
-                gint16 *text16 = (gint16 *) ((glong) frame->fr_data + 1);
-
-                while (*text16 != 0)
-                    text16++;
-
-                return g_strdup((char *) (++text16));
-            }
-        default:
-            return NULL;
-        }
+	    return NULL;
+    
+    ID3_FRAME_DEFINE_CURSOR(frame);
+    
+    if ( frame->fr_desc->fd_id == ID3_WXXX ) {
+	/*
+	 * This is a user defined link frame.  Skip the description.
+	 */
+	guint8 encoding;
+	gsize description_size;
+	
+	ID3_FRAME_READ_OR_RETVAL(encoding, NULL);
+	
+	description_size = id3_string_size(encoding, cursor, length);
+	if (description_size == 0)
+	   return NULL;
+	cursor += description_size;
+	length -= description_size;
     }
-
-    return g_strdup((char *) frame->fr_data);
+    
+    return id3_string_decode(ID3_ENCODING_ISO_8859_1, cursor, length);
 }
 
-
 /*
  * Function id3_get_url_desc (frame)
  *
@@ -91,19 +80,21 @@
 id3_get_url_desc(struct id3_frame *frame)
 {
     /* Type check */
-    if (frame->fr_desc->fd_idstr[0] != 'W')
-        return NULL;
+    if ( frame->fr_desc->fd_idstr[0] != 'W' )
+	return NULL;
 
     /* If predefined link frame, return description. */
-    if (frame->fr_desc->fd_id != ID3_WXXX)
-        return frame->fr_desc->fd_description;
+    if ( frame->fr_desc->fd_id != ID3_WXXX )
+	return frame->fr_desc->fd_description;
 
     /* Check if frame is compressed */
     if (id3_decompress_frame(frame) == -1)
-        return NULL;
-
-    if (*(guint8 *) frame->fr_data == ID3_ENCODING_ISO_8859_1)
-        return g_strdup((char *) frame->fr_data + 1);
-    else
-        return id3_utf16_to_ascii((gint16 *) ((glong) frame->fr_data + 1));
+	    return NULL;
+	    
+    ID3_FRAME_DEFINE_CURSOR(frame);
+    
+    guint8 encoding;
+    ID3_FRAME_READ_OR_RETVAL(encoding, NULL);
+    
+    return id3_string_decode(encoding, cursor, length);
 }
--- a/Plugins/Input/mpg123/xmms-id3.h	Fri Nov 25 19:54:46 2005 -0800
+++ b/Plugins/Input/mpg123/xmms-id3.h	Fri Nov 25 20:23:40 2005 -0800
@@ -149,6 +149,21 @@
 #define ID3_ENCODING_UTF8	0x03
 
 
+/*
+ * Handy macros which help us writing more secure length-aware code
+ * which involves reading the frame's data buffer.
+ */
+
+#define ID3_FRAME_DEFINE_CURSOR(frame) \
+	gsize length = frame->fr_size; \
+	guint8* cursor = frame->fr_data;
+	
+#define ID3_FRAME_READ_OR_RETVAL(variable, retval) \
+	if (length < sizeof(variable)) \
+		return retval; \
+	memcpy((void*)&variable, (void*)cursor, sizeof(variable)); \
+	cursor += sizeof(variable); \
+	length -= sizeof(variable);
 
 /*
  * ID3 frame id numbers.
@@ -318,6 +333,21 @@
 #define ID3_WPB ID3_FRAME_ID_22('W', 'P', 'B')
 #define ID3_WXX ID3_FRAME_ID_22('W', 'X', 'X')
 
+/*
+ * Handy macros which help us writing more secure length-aware code
+ * which involves reading the frame's data buffer.
+ */
+
+#define ID3_FRAME_DEFINE_CURSOR(frame) \
+	gsize length = frame->fr_size; \
+	guint8* cursor = frame->fr_data;
+	
+#define ID3_FRAME_READ_OR_RETVAL(variable, retval) \
+	if (length < sizeof(variable)) \
+		return retval; \
+	memcpy((void*)&variable, (void*)cursor, sizeof(variable)); \
+	cursor += sizeof(variable); \
+	length -= sizeof(variable);
 
 /*
  * Prototypes.
@@ -332,6 +362,7 @@
 int id3_tell(struct id3_tag *);
 int id3_alter_file(struct id3_tag *);
 int id3_write_tag(struct id3_tag *, int);
+char *id3_utf16_to_ascii(void *);
 
 /* From id3_frame.c */
 int id3_read_frame(struct id3_tag *id3);
@@ -343,7 +374,8 @@
 void id3_frame_clear_data(struct id3_frame *frame);
 
 /* From id3_frame_text.c */
-char *id3_utf16_to_ascii(void *);
+gsize id3_string_size(guint8 encoding, const void* text, gsize max_size);
+gchar* id3_string_decode(guint8 encoding, const void* text, gsize max_size);
 gint8 id3_get_encoding(struct id3_frame *);
 int id3_set_encoding(struct id3_frame *, gint8);
 char *id3_get_text(struct id3_frame *);