# HG changeset patch # User nenolod # Date 1132979020 28800 # Node ID 539a0fa7f030143b407a34571a571ddc0f5e7127 # Parent f6ad670bb500a9584a3c1b020d48ac7c535340c7 [svn] Unicode support fixes, based on an XMMS patch written by Ilya Konstantinov. : - Fixes UTF16 to UTF8 down-conversion. - Fixes length management of UTF8 entities. - Doesn't break ASCII support. - Corrects several issues with the original implementation. diff -r f6ad670bb500 -r 539a0fa7f030 Plugins/Input/mpg123/id3.c --- a/Plugins/Input/mpg123/id3.c Fri Nov 25 19:54:46 2005 -0800 +++ b/Plugins/Input/mpg123/id3.c Fri Nov 25 20:23:40 2005 -0800 @@ -39,6 +39,19 @@ #include "xmms-id3.h" #include "id3_header.h" +char * +id3_utf16_to_ascii(void *utf16) +{ + char ascii[256]; + char *uc = (char *) utf16 + 2; + int i; + + for (i = 0; *uc != 0 && i < sizeof(ascii); i++, uc += 2) + ascii[i] = *uc; + + ascii[i] = 0; + return g_strdup(ascii); +} /* ** diff -r f6ad670bb500 -r 539a0fa7f030 Plugins/Input/mpg123/id3_frame_content.c --- a/Plugins/Input/mpg123/id3_frame_content.c Fri Nov 25 19:54:46 2005 -0800 +++ b/Plugins/Input/mpg123/id3_frame_content.c Fri Nov 25 20:23:40 2005 -0800 @@ -28,6 +28,7 @@ #include #include +#include #include "xmms-id3.h" @@ -44,95 +45,95 @@ char * id3_get_content(struct id3_frame *frame) { - char *text, *text_beg, *ptr; - char buffer[256]; - int spc = sizeof(buffer) - 1; + gchar *text, *text_it; + + /* Type check */ + if (frame->fr_desc->fd_id != ID3_TCON) + return NULL; - /* Type check */ - if (frame->fr_desc->fd_id != ID3_TCON) - return NULL; + /* Check if frame is compressed */ + if (id3_decompress_frame(frame) == -1) + return NULL; + + ID3_FRAME_DEFINE_CURSOR(frame); - /* Check if frame is compressed */ - if (id3_decompress_frame(frame) == -1) + guint8 encoding; + ID3_FRAME_READ_OR_RETVAL(encoding, NULL); + + text = text_it = id3_string_decode(encoding, cursor, length); + + if (text == NULL) return NULL; - if (*(guint8 *) frame->fr_data == ID3_ENCODING_ISO_8859_1) - text_beg = text = g_strdup((char *) frame->fr_data + 1); - else - text_beg = text = id3_utf16_to_ascii((char *) frame->fr_data + 1); + /* + * Expand ID3v1 genre numbers. + */ + while ((text_it = strstr(text_it, "(")) != NULL) + { + gchar* replace = NULL; + gchar* ref_start = text_it + 1; - /* - * If content is just plain text, return it. - */ - if (text[0] != '(') { - return text; - } + if (*ref_start == ')') + { + /* False alarm */ + ++text_it; + continue; + } + + gsize ref_size = strstr(ref_start, ")") - ref_start; - /* - * Expand ID3v1 genre numbers. - */ - ptr = buffer; - while (text[0] == '(' && text[1] != '(' && spc > 0) { - const char *genre; - int num = 0; - - if (text[1] == 'R' && text[2] == 'X') { - text += 4; - genre = _(" (Remix)"); - if (ptr == buffer) - genre++; - - } - else if (text[1] == 'C' && text[2] == 'R') { - text += 4; - genre = _(" (Cover)"); - if (ptr == buffer) - genre++; + if (strncmp(ref_start, "RX", ref_size) == 0) + { + replace = _("Remix"); + } + else if (strncmp(ref_start, "CR", ref_size) == 0) + { + replace = _("Cover"); + } + else + { + /* Maybe an ID3v1 genre? */ + int genre_number; + gchar* genre_number_str = g_strndup(ref_start, ref_size); + if (sscanf(genre_number_str, "%d", &genre_number) > 0) + { + /* Boundary check */ + if (genre_number >= sizeof(mpg123_id3_genres) / sizeof(char *)) + continue; - } - else { - /* Get ID3v1 genre number */ - text++; - while (*text != ')') { - num *= 10; - num += *text++ - '0'; - } - text++; + replace = gettext(mpg123_id3_genres[genre_number]); + } + } - /* Boundary check */ - if (num >= sizeof(mpg123_id3_genres) / sizeof(char *)) - continue; - - genre = gettext(mpg123_id3_genres[num]); - - if (ptr != buffer && spc-- > 0) - *ptr++ = '/'; - } + if (replace != NULL) + { + /* Amazingly hairy code to replace a part of the original genre string + with 'replace'. */ + gchar* copy = g_malloc(strlen(text) - ref_size + strlen(replace) + 1); + gsize pos = 0; + gsize copy_size; - /* Expand string into buffer */ - while (*genre != '\0' && spc > 0) { - *ptr++ = *genre++; - spc--; - } - } + /* Copy the part before the replaced part */ + copy_size = ref_start - text; + memcpy(copy + pos, text, copy_size); + pos += copy_size; + /* Copy the replacement instead of the original reference */ + copy_size = strlen(replace); + memcpy(copy + pos, replace, copy_size); + pos += copy_size; + /* Copy the rest, including the null */ + memcpy(copy + pos, ref_start + ref_size, strlen(ref_start + ref_size)+1); - /* - * Add plaintext refinement. - */ - if (*text == '(') - text++; - if (*text != '\0' && ptr != buffer && spc-- > 0) - *ptr++ = ' '; - while (*text != '\0' && spc > 0) { - *ptr++ = *text++; - spc--; - } - *ptr = '\0'; + /* Put into original variables */ + gsize offset = text_it - text; + g_free(text); + text = copy; + text_it = text + offset; + } - g_free(text_beg); + ++text_it; + } - /* - * Return the expanded content string. - */ - return g_strdup(buffer); + return text; + } diff -r f6ad670bb500 -r 539a0fa7f030 Plugins/Input/mpg123/id3_frame_text.c --- a/Plugins/Input/mpg123/id3_frame_text.c Fri Nov 25 19:54:46 2005 -0800 +++ b/Plugins/Input/mpg123/id3_frame_text.c Fri Nov 25 20:23:40 2005 -0800 @@ -34,22 +34,6 @@ #include "xmms-id3.h" #include "id3_header.h" - -char * -id3_utf16_to_ascii(void *utf16) -{ - char ascii[256]; - char *uc = (char *) utf16 + 2; - int i; - - for (i = 0; *uc != 0 && i < sizeof(ascii); i++, uc += 2) - ascii[i] = *uc; - - ascii[i] = 0; - return g_strdup(ascii); -} - - /* * Function id3_get_encoding (frame) * @@ -78,7 +62,12 @@ if (id3_decompress_frame(frame) == -1) return -1; - return *(gint8 *) frame->fr_data; + ID3_FRAME_DEFINE_CURSOR(frame); + + guint8 encoding; + ID3_FRAME_READ_OR_RETVAL(encoding, -1); + + return encoding; } @@ -119,6 +108,96 @@ return 0; } +/* Get size of string in bytes including null. */ +gsize id3_string_size(guint8 encoding, const void* text, gsize max_size) +{ + switch ( encoding ) { + case ID3_ENCODING_ISO_8859_1: + case ID3_ENCODING_UTF8: + { + const guint8* text8 = text; + while ( (max_size >= sizeof(*text8)) && (*text8 != 0) ) + { + ++text8; + max_size -= sizeof(*text8); + } + + if (max_size >= sizeof(*text8)) + { + ++text8; + max_size -= sizeof(*text8); + } + + return text8 - (guint8*)text; + } + case ID3_ENCODING_UTF16: + case ID3_ENCODING_UTF16BE: + { + const guint16* text16 = (guint16*)text; + while ( (max_size > 0) && (*text16 != 0) ) + { + ++text16; + max_size -= sizeof(*text16); + } + + if (max_size > 0) + { + ++text16; + max_size -= sizeof(*text16); + } + + return (guint8*)text16 - (guint8*)text; + } + default: + return 0; + } +} + +/* +Returns a newly-allocated UTF-8 string in the locale's encoding. +max_size specifies the maximum size of 'text', including terminating nulls. +*/ +gchar* id3_string_decode(guint8 encoding, const void* text, gsize max_size) +{ + /* Otherwise, we'll end up passing -1 to functions, eliminating safety benefits. */ + if (max_size <= 0) + return NULL; + + switch( encoding ) + { + case ID3_ENCODING_ISO_8859_1: + { + return g_locale_to_utf8((const gchar*)text, max_size, NULL, NULL, NULL); + } + case ID3_ENCODING_UTF8: + { + if (g_utf8_validate((const gchar*)text, max_size, NULL)) + return g_strndup((const gchar*)text, max_size); + else + return NULL; + } + case ID3_ENCODING_UTF16: + { + gsize size_bytes = id3_string_size(encoding, text, max_size); + gchar* utf8 = g_convert((const gchar*)text, size_bytes, "UTF-8", "UTF-16", NULL, NULL, NULL); + /* If conversion passes on the BOM as-is, we strip it. */ + if (g_utf8_get_char(utf8) == 0xfeff) + { + gchar* new_utf8 = g_strdup(utf8+3); + g_free(utf8); + utf8 = new_utf8; + } + return utf8; + } + case ID3_ENCODING_UTF16BE: + { + gsize size_bytes = id3_string_size(encoding, text, max_size); + return g_convert((const gchar*)text, size_bytes, "UTF-8", "UTF-16BE", NULL, NULL, NULL); + } + default: + return NULL; + } +} /* * Function id3_get_text (frame) @@ -193,10 +272,12 @@ if (id3_decompress_frame(frame) == -1) return NULL; - if (*(guint8 *) frame->fr_data == ID3_ENCODING_ISO_8859_1) - return g_strdup((char *) frame->fr_data + 1); - else - return id3_utf16_to_ascii((char *) frame->fr_data + 1); + ID3_FRAME_DEFINE_CURSOR(frame); + + guint8 encoding; + ID3_FRAME_READ_OR_RETVAL(encoding, NULL); + + return id3_string_decode(encoding, cursor, length); } @@ -214,46 +295,23 @@ /* Check if frame is compressed */ if (id3_decompress_frame(frame) == -1) - return -1; + return -1; - /* - * Generate integer according to encoding. - */ - switch (*(guint8 *) frame->fr_data) { - case ID3_ENCODING_ISO_8859_1: - { - char *text = ((char *) frame->fr_data) + 1; - - while (*text >= '0' && *text <= '9') { - number *= 10; - number += *text - '0'; - text++; - } + ID3_FRAME_DEFINE_CURSOR(frame); + + guint8 encoding; + ID3_FRAME_READ_OR_RETVAL(encoding, number); - return number; - } - case ID3_ENCODING_UTF16: - { - char *text = ((char *) frame->fr_data) + 3; - -/* if (*(gint16 *) frame->fr_data == 0xfeff) */ -/* text++; */ - - while (*text >= '0' && *text <= '9') { - number *= 10; - number += *text - '0'; - text++; - } - - return number; - } - - default: - return -1; + gchar* number_str = id3_string_decode(encoding, cursor, length); + if (number_str != NULL) + { + sscanf(number_str, "%d", &number); + g_free(number_str); } + + return number; } - /* * Function id3_set_text (frame, text) * @@ -342,9 +400,7 @@ */ *(gint8 *) frame->fr_raw_data = ID3_ENCODING_ISO_8859_1; text = (char *) frame->fr_raw_data + 1; - while (--pos >= 0) - *text++ = buf[pos]; - *text = '\0'; + strcpy(text, buf); frame->fr_altered = 1; frame->fr_owner->id3_altered = 1; diff -r f6ad670bb500 -r 539a0fa7f030 Plugins/Input/mpg123/id3_frame_url.c --- a/Plugins/Input/mpg123/id3_frame_url.c Fri Nov 25 19:54:46 2005 -0800 +++ b/Plugins/Input/mpg123/id3_frame_url.c Fri Nov 25 20:23:40 2005 -0800 @@ -30,7 +30,7 @@ #include "xmms-id3.h" #include "id3_header.h" - +#include /* * Function id3_get_url (frame) @@ -42,45 +42,34 @@ id3_get_url(struct id3_frame *frame) { /* Type check */ - if (frame->fr_desc->fd_idstr[0] != 'W') - return NULL; + if ( frame->fr_desc->fd_idstr[0] != 'W' ) + return NULL; /* Check if frame is compressed */ if (id3_decompress_frame(frame) == -1) - return NULL; - - if (frame->fr_desc->fd_id == ID3_WXXX) { - /* - * This is a user defined link frame. Skip the description. - */ - switch (*(guint8 *) frame->fr_data) { - case ID3_ENCODING_ISO_8859_1: - { - char *text = (char *) frame->fr_data + 1; - - while (*text != 0) - text++; - - return g_strdup(++text); - } - case ID3_ENCODING_UTF16: - { - gint16 *text16 = (gint16 *) ((glong) frame->fr_data + 1); - - while (*text16 != 0) - text16++; - - return g_strdup((char *) (++text16)); - } - default: - return NULL; - } + return NULL; + + ID3_FRAME_DEFINE_CURSOR(frame); + + if ( frame->fr_desc->fd_id == ID3_WXXX ) { + /* + * This is a user defined link frame. Skip the description. + */ + guint8 encoding; + gsize description_size; + + ID3_FRAME_READ_OR_RETVAL(encoding, NULL); + + description_size = id3_string_size(encoding, cursor, length); + if (description_size == 0) + return NULL; + cursor += description_size; + length -= description_size; } - - return g_strdup((char *) frame->fr_data); + + return id3_string_decode(ID3_ENCODING_ISO_8859_1, cursor, length); } - /* * Function id3_get_url_desc (frame) * @@ -91,19 +80,21 @@ id3_get_url_desc(struct id3_frame *frame) { /* Type check */ - if (frame->fr_desc->fd_idstr[0] != 'W') - return NULL; + if ( frame->fr_desc->fd_idstr[0] != 'W' ) + return NULL; /* If predefined link frame, return description. */ - if (frame->fr_desc->fd_id != ID3_WXXX) - return frame->fr_desc->fd_description; + if ( frame->fr_desc->fd_id != ID3_WXXX ) + return frame->fr_desc->fd_description; /* Check if frame is compressed */ if (id3_decompress_frame(frame) == -1) - return NULL; - - if (*(guint8 *) frame->fr_data == ID3_ENCODING_ISO_8859_1) - return g_strdup((char *) frame->fr_data + 1); - else - return id3_utf16_to_ascii((gint16 *) ((glong) frame->fr_data + 1)); + return NULL; + + ID3_FRAME_DEFINE_CURSOR(frame); + + guint8 encoding; + ID3_FRAME_READ_OR_RETVAL(encoding, NULL); + + return id3_string_decode(encoding, cursor, length); } diff -r f6ad670bb500 -r 539a0fa7f030 Plugins/Input/mpg123/xmms-id3.h --- a/Plugins/Input/mpg123/xmms-id3.h Fri Nov 25 19:54:46 2005 -0800 +++ b/Plugins/Input/mpg123/xmms-id3.h Fri Nov 25 20:23:40 2005 -0800 @@ -149,6 +149,21 @@ #define ID3_ENCODING_UTF8 0x03 +/* + * Handy macros which help us writing more secure length-aware code + * which involves reading the frame's data buffer. + */ + +#define ID3_FRAME_DEFINE_CURSOR(frame) \ + gsize length = frame->fr_size; \ + guint8* cursor = frame->fr_data; + +#define ID3_FRAME_READ_OR_RETVAL(variable, retval) \ + if (length < sizeof(variable)) \ + return retval; \ + memcpy((void*)&variable, (void*)cursor, sizeof(variable)); \ + cursor += sizeof(variable); \ + length -= sizeof(variable); /* * ID3 frame id numbers. @@ -318,6 +333,21 @@ #define ID3_WPB ID3_FRAME_ID_22('W', 'P', 'B') #define ID3_WXX ID3_FRAME_ID_22('W', 'X', 'X') +/* + * Handy macros which help us writing more secure length-aware code + * which involves reading the frame's data buffer. + */ + +#define ID3_FRAME_DEFINE_CURSOR(frame) \ + gsize length = frame->fr_size; \ + guint8* cursor = frame->fr_data; + +#define ID3_FRAME_READ_OR_RETVAL(variable, retval) \ + if (length < sizeof(variable)) \ + return retval; \ + memcpy((void*)&variable, (void*)cursor, sizeof(variable)); \ + cursor += sizeof(variable); \ + length -= sizeof(variable); /* * Prototypes. @@ -332,6 +362,7 @@ int id3_tell(struct id3_tag *); int id3_alter_file(struct id3_tag *); int id3_write_tag(struct id3_tag *, int); +char *id3_utf16_to_ascii(void *); /* From id3_frame.c */ int id3_read_frame(struct id3_tag *id3); @@ -343,7 +374,8 @@ void id3_frame_clear_data(struct id3_frame *frame); /* From id3_frame_text.c */ -char *id3_utf16_to_ascii(void *); +gsize id3_string_size(guint8 encoding, const void* text, gsize max_size); +gchar* id3_string_decode(guint8 encoding, const void* text, gsize max_size); gint8 id3_get_encoding(struct id3_frame *); int id3_set_encoding(struct id3_frame *, gint8); char *id3_get_text(struct id3_frame *);