Mercurial > audlegacy
view audacious/strings.c @ 2297:a9bc621d6b1b trunk
[svn] libguess update:
- follow the update of upstream.
- now precedence orders of encodings are explicitly specifiable on compile time.
- make UTF-8 the highest ordered eoncoding. (it may cope with the problems described in #738.)
author | yaz |
---|---|
date | Sun, 07 Jan 2007 21:17:40 -0800 |
parents | 7d40f0a290b9 |
children |
line wrap: on
line source
/* Audacious * Copyright (C) 2005-2007 Audacious development team. * * BMP - Cross-platform multimedia player * Copyright (C) 2003-2004 BMP development team. * * Based on XMMS: * Copyright (C) 1998-2003 XMMS development team. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; under version 2 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. */ #define WEIRD_UTF_16_PLAYLIST_ENCODING #ifdef HAVE_CONFIG_H # include "config.h" #endif #define NEED_GLADE #include "util.h" #include <glib.h> #include <glib/gi18n.h> #include <glade/glade.h> #include <gtk/gtk.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> #include "platform/smartinclude.h" #include <gdk/gdkkeysyms.h> #include <X11/Xlib.h> //#include <sys/ipc.h> #include <unistd.h> #include <errno.h> #ifdef HAVE_FTS_H # include <fts.h> #endif #include "glade.h" #include "input.h" #include "main.h" #include "playback.h" #include "playlist.h" #include "ui_playlist.h" #ifdef USE_CHARDET #include "../libguess/libguess.h" #include "../librcd/librcd.h" #ifdef HAVE_UDET #include <libudet_c.h> #endif #endif static GQuark quark_popup_data; /* * escape_shell_chars() * * Escapes characters that are special to the shell inside double quotes. */ gchar * escape_shell_chars(const gchar * string) { const gchar *special = "$`\"\\"; /* Characters to escape */ const gchar *in = string; gchar *out, *escaped; gint num = 0; while (*in != '\0') if (strchr(special, *in++)) num++; escaped = g_malloc(strlen(string) + num + 1); in = string; out = escaped; while (*in != '\0') { if (strchr(special, *in)) *out++ = '\\'; *out++ = *in++; } *out = '\0'; return escaped; } static gchar * str_twenty_to_space(gchar * str) { gchar *match, *match_end; g_return_val_if_fail(str != NULL, NULL); while ((match = strstr(str, "%20"))) { match_end = match + 3; *match++ = ' '; while (*match_end) *match++ = *match_end++; *match = 0; } return str; } static gchar * str_replace_char(gchar * str, gchar old, gchar new) { gchar *match; g_return_val_if_fail(str != NULL, NULL); match = str; while ((match = strchr(match, old))) *match = new; return str; } gchar * str_append(gchar * str, const gchar * add_str) { return str_replace(str, g_strconcat(str, add_str, NULL)); } gchar * str_replace(gchar * str, gchar * new_str) { g_free(str); return new_str; } void str_replace_in(gchar ** str, gchar * new_str) { *str = str_replace(*str, new_str); } gboolean str_has_prefix_nocase(const gchar * str, const gchar * prefix) { return (strncasecmp(str, prefix, strlen(prefix)) == 0); } gboolean str_has_suffix_nocase(const gchar * str, const gchar * suffix) { return (strcasecmp(str + strlen(str) - strlen(suffix), suffix) == 0); } gboolean str_has_suffixes_nocase(const gchar * str, gchar * const *suffixes) { gchar *const *suffix; g_return_val_if_fail(str != NULL, FALSE); g_return_val_if_fail(suffixes != NULL, FALSE); for (suffix = suffixes; *suffix; suffix++) if (str_has_suffix_nocase(str, *suffix)) return TRUE; return FALSE; } gchar * str_to_utf8_fallback(const gchar * str) { gchar *out_str, *convert_str, *chr; /* NULL in NULL out */ if (!str) return NULL; convert_str = g_strdup(str); for (chr = convert_str; *chr; chr++) { if (*chr & 0x80) *chr = '?'; } out_str = g_strconcat(convert_str, _(" (invalid UTF-8)"), NULL); g_free(convert_str); return out_str; } gchar * filename_to_utf8(const gchar * filename) { gchar *out_str; /* NULL in NULL out */ if (!filename) return NULL; if ((out_str = g_filename_to_utf8(filename, -1, NULL, NULL, NULL))) return out_str; return str_to_utf8_fallback(filename); } gchar * str_to_utf8(const gchar * str) { gchar *out_str; /* NULL in NULL out */ if (!str) return NULL; /* Note: Currently, playlist calls this function repeatedly, even * if the string is already converted into utf-8. * chardet_to_utf8() would convert a valid utf-8 string into a * different utf-8 string, if fallback encodings were supplied and * the given string could be treated as a string in one of fallback * encodings. To avoid this, the order of evaluation has been * changed. (It might cause a drawback?) */ /* chardet encoding detector */ if ((out_str = chardet_to_utf8(str, strlen(str), NULL, NULL, NULL))) return out_str; /* already UTF-8? */ if (g_utf8_validate(str, -1, NULL)) return g_strdup(str); /* assume encoding associated with locale */ if ((out_str = g_locale_to_utf8(str, -1, NULL, NULL, NULL))) return out_str; /* all else fails, we mask off character codes >= 128, replace with '?' */ return str_to_utf8_fallback(str); } const gchar * str_skip_chars(const gchar * str, const gchar * chars) { while (strchr(chars, *str)) str++; return str; } gchar * convert_title_text(gchar * title) { g_return_val_if_fail(title != NULL, NULL); if (cfg.convert_slash) str_replace_char(title, '\\', '/'); if (cfg.convert_underscore) str_replace_char(title, '_', ' '); if (cfg.convert_twenty) str_twenty_to_space(title); return title; } gchar *chardet_to_utf8(const gchar *str, gssize len, gsize *arg_bytes_read, gsize *arg_bytes_write, GError **arg_error) { #ifdef USE_CHARDET char *det = NULL, *encoding = NULL; #endif gchar *ret = NULL; gsize *bytes_read, *bytes_write; GError **error; gsize my_bytes_read, my_bytes_write; bytes_read = arg_bytes_read ? arg_bytes_read : &my_bytes_read; bytes_write = arg_bytes_write ? arg_bytes_write : &my_bytes_write; error = arg_error ? arg_error : NULL; #ifdef USE_CHARDET if(cfg.chardet_detector) det = cfg.chardet_detector; if(det){ if(!strncasecmp("japanese", det, sizeof("japanese"))) { encoding = (char *)guess_jp(str, strlen(str)); if (!encoding) goto fallback; } else if(!strncasecmp("taiwanese", det, sizeof("taiwanese"))) { encoding = (char *)guess_tw(str, strlen(str)); if (!encoding) goto fallback; } else if(!strncasecmp("chinese", det, sizeof("chinese"))) { encoding = (char *)guess_cn(str, strlen(str)); if (!encoding) goto fallback; } else if(!strncasecmp("korean", det, sizeof("korean"))) { encoding = (char *)guess_kr(str, strlen(str)); if (!encoding) goto fallback; } else if(!strncasecmp("russian", det, sizeof("russian"))) { rcd_russian_charset res = rcdGetRussianCharset(str, strlen(str)); switch(res) { case RUSSIAN_CHARSET_WIN: encoding = "CP1251"; break; case RUSSIAN_CHARSET_ALT: encoding = "CP866"; break; case RUSSIAN_CHARSET_KOI: encoding = "KOI8-R"; break; case RUSSIAN_CHARSET_UTF8: encoding = "UTF-8"; break; } if (!encoding) goto fallback; #ifdef HAVE_UDET } else if (!strncasecmp("universal", det, sizeof("universal"))) { encoding = (char *)detectCharset((char *)str, strlen(str)); if (!encoding) goto fallback; #endif } else /* none, invalid */ goto fallback; ret = g_convert(str, len, "UTF-8", encoding, bytes_read, bytes_write, error); } fallback: #endif if(!ret && cfg.chardet_fallback){ gchar **encs=NULL, **enc=NULL; encs = g_strsplit_set(cfg.chardet_fallback, " ,:;|/", 0); if(encs){ enc = encs; for(enc=encs; *enc ; enc++){ ret = g_convert(str, len, "UTF-8", *enc, bytes_read, bytes_write, error); if(len == *bytes_read){ break; } } g_strfreev(encs); } } #ifdef USE_CHARDET /* many tag libraries return 2byte latin1 utf8 character as converted 8bit iso-8859-1 character, if they are asked to return latin1 string. */ if(!ret){ ret = g_convert(str, len, "UTF-8", "ISO-8859-1", bytes_read, bytes_write, error); } #endif if(ret){ if(g_utf8_validate(ret, -1, NULL)) return ret; else { g_free(ret); ret = NULL; } } return NULL; /* if I have no idea, return NULL. */ }