# HG changeset patch # User Jussi Judin # Date 1203389080 21600 # Node ID ca077e01ed3aaa158136ab86371c4b11b37861f1 # Parent 5b277773870ef1e449a1ae18a87268c75ce5026e Add caching to Jump to Track feature to speed up searches. (Bugzilla #180) diff -r 5b277773870e -r ca077e01ed3a src/audacious/Makefile --- a/src/audacious/Makefile Mon Feb 18 23:20:12 2008 +0300 +++ b/src/audacious/Makefile Mon Feb 18 20:44:40 2008 -0600 @@ -50,6 +50,7 @@ ui_fileinfopopup.c \ ui_fileopener.c \ ui_jumptotrack.c \ + ui_jumptotrack_cache.c \ ui_main.c \ ui_main_evlisteners.c \ ui_manager.c \ diff -r 5b277773870e -r ca077e01ed3a src/audacious/ui_jumptotrack.c --- a/src/audacious/ui_jumptotrack.c Mon Feb 18 23:20:12 2008 +0300 +++ b/src/audacious/ui_jumptotrack.c Mon Feb 18 20:44:40 2008 -0600 @@ -79,9 +79,13 @@ #include "ui_skinned_window.h" +#include "ui_jumptotrack_cache.h" + static GtkWidget *jump_to_track_win = NULL; static gulong serial = 0; +static JumpToTrackCache* cache = NULL; + static void change_song(guint pos) { @@ -236,22 +240,6 @@ return FALSE; } -static gboolean -ui_jump_to_track_match(const gchar * song, GSList *regex_list) -{ - if ( song == NULL ) - return FALSE; - - for ( ; regex_list ; regex_list = g_slist_next(regex_list) ) - { - regex_t *regex = regex_list->data; - if ( regexec( regex , song , 0 , NULL , 0 ) != 0 ) - return FALSE; - } - - return TRUE; -} - void ui_jump_to_track_update(GtkWidget * widget, gpointer user_data) { @@ -311,21 +299,6 @@ serial = playlist->serial; // important. --yaz } - -/** - * Normalizes an UTF-8 string to be used in case-insensitive searches. - * - * Returned string should be freed. - */ -static gchar * -normalize_search_string(const gchar * string) -{ - gchar* normalized_string = g_utf8_normalize(string, -1, G_NORMALIZE_NFKD); - gchar* folded_string = g_utf8_casefold(normalized_string, -1); - g_free(normalized_string); - return folded_string; -} - static void ui_jump_to_track_edit_cb(GtkEntry * entry, gpointer user_data) { @@ -335,33 +308,12 @@ GtkListStore *store; - guint song_index = 0; - gchar **words; - GList *playlist_glist; + const GArray *search_matches; Playlist *playlist; - - gboolean match = FALSE; - - GSList *regex_list = NULL, *regex_list_tmp = NULL; - gint i = -1; + int i; - /* Chop the key string into ' '-separated key regex-pattern strings */ - gchar *search_text = normalize_search_string(gtk_entry_get_text(entry)); - words = g_strsplit(search_text, " ", 0); - g_free(search_text); - - /* create a list of regex using the regex-pattern strings */ - while ( words[++i] != NULL ) - { - regex_t *regex = g_malloc(sizeof(regex_t)); - #if defined(USE_REGEX_PCRE) - if ( regcomp( regex , words[i] , REG_NOSUB | REG_UTF8 ) == 0 ) - #else - if ( regcomp( regex , words[i] , REG_NOSUB ) == 0 ) - #endif - regex_list = g_slist_append( regex_list , regex ); - else - g_free( regex ); + if (cache == NULL) { + cache = ui_jump_to_track_cache_new(); } /* FIXME: Remove the connected signals before clearing @@ -377,62 +329,30 @@ PLAYLIST_LOCK(playlist); - for (playlist_glist = playlist->entries; playlist_glist; - playlist_glist = g_list_next(playlist_glist)) + search_matches = ui_jump_to_track_cache_search(cache, + playlist, + gtk_entry_get_text(entry)); + + for (i = 0; i < search_matches->len; i++) { - PlaylistEntry *entry = PLAYLIST_ENTRY(playlist_glist->data); + JumpToTrackEntry *jttentry = g_array_index(search_matches, JumpToTrackEntry*, i); + PlaylistEntry* entry = jttentry->entry; gchar *title = NULL; - if (entry->title) { - title = normalize_search_string(entry->title); - } else { + if (entry->title) + title = g_strdup(entry->title); + else { gchar *realfn = NULL; realfn = g_filename_from_uri(entry->filename, NULL, NULL); - gchar *tmp_title = str_to_utf8(realfn ? realfn : entry->filename); - title = normalize_search_string(tmp_title); - g_free(tmp_title); + if (strchr(realfn ? realfn : entry->filename, '/')) + title = str_to_utf8(strrchr(realfn ? realfn : entry->filename, '/') + 1); + else + title = str_to_utf8(realfn ? realfn : entry->filename); g_free(realfn); realfn = NULL; } - - /*we are matching all the path not just the filename or title*/ - - /* Compare the reg.expressions to the string - if all the - regexp in regex_list match, add to the ListStore */ - - /* - * FIXME: The search string should be adapted to the - * current display setting, e.g. if the user has set it to - * "%p - %t" then build the match string like that too, or - * even better, search for each of the tags seperatly. - * - * In any case the string to match should _never_ contain - * something the user can't actually see in the playlist. - */ - //g_print ("it passed\n"); - if (regex_list != NULL) - match = ui_jump_to_track_match(title, regex_list); - else - match = TRUE; - - g_free(title); title = NULL; - - if (match) { - if (entry->title) - title = g_strdup(entry->title); - else { - gchar *realfn = NULL; - realfn = g_filename_from_uri(entry->filename, NULL, NULL); - if (strchr(realfn ? realfn : entry->filename, '/')) - title = str_to_utf8(strrchr(realfn ? realfn : entry->filename, '/') + 1); - else - title = str_to_utf8(realfn ? realfn : entry->filename); - g_free(realfn); realfn = NULL; - } - gtk_list_store_append(store, &iter); - gtk_list_store_set(store, &iter, 0, song_index + 1 , 1, title, -1); - } - song_index++; - g_free(title); title = NULL; + gtk_list_store_append(store, &iter); + gtk_list_store_set(store, &iter, 0, jttentry->playlist_position + 1 , 1, title, -1); + g_free(title); } PLAYLIST_UNLOCK(playlist); @@ -441,20 +361,6 @@ gtk_tree_view_set_model( GTK_TREE_VIEW(treeview) , GTK_TREE_MODEL(store) ); g_object_unref( store ); - if ( regex_list != NULL ) - { - regex_list_tmp = regex_list; - while ( regex_list != NULL ) - { - regex_t *regex = regex_list->data; - regfree( regex ); - g_free( regex ); - regex_list = g_slist_next(regex_list); - } - g_slist_free( regex_list_tmp ); - } - g_strfreev(words); - if (gtk_tree_model_get_iter_first(GTK_TREE_MODEL(store), &iter)) { selection = gtk_tree_view_get_selection(treeview); gtk_tree_selection_select_iter(selection, &iter); diff -r 5b277773870e -r ca077e01ed3a src/audacious/ui_jumptotrack_cache.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/audacious/ui_jumptotrack_cache.c Mon Feb 18 20:44:40 2008 -0600 @@ -0,0 +1,431 @@ +/* Audacious - Cross-platform multimedia player + * Copyright (C) 2008 Audacious development team. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * The Audacious team does not consider modular code linking to + * Audacious or using our public API to be a derived work. + */ + +#include +#include +#include + +#if defined(USE_REGEX_ONIGURUMA) + #include +#elif defined(USE_REGEX_PCRE) + #include +#else + #include +#endif + +#include "playlist.h" +#include "strings.h" + +#include "ui_jumptotrack_cache.h" + +// Struct to keep information about matches from searches. +typedef struct +{ + GArray* track_entries; // JumpToTrackEntry* + GArray* normalized_titles; // gchar* +} KeywordMatches; + +/** + * Creates an regular expression list usable in searches from search keyword. + * + * In searches, every regular expression on this list is matched against + * the search title and if they all match, the title is declared as + * matching one. + * + * Regular expressions in list are formed by splitting the 'keyword' to words + * by splitting the keyword string with space character. + */ +static GSList* +ui_jump_to_track_cache_regex_list_create(const GString* keyword) +{ + GSList *regex_list = NULL; + gchar **words = NULL; + int i = -1; + /* Chop the key string into ' '-separated key regex-pattern strings */ + words = g_strsplit(keyword->str, " ", 0); + + /* create a list of regex using the regex-pattern strings */ + while ( words[++i] != NULL ) + { + // Ignore empty words. + if (words[i][0] == 0) { + continue; + } + regex_t *regex = g_malloc(sizeof(regex_t)); + #if defined(USE_REGEX_PCRE) + if ( regcomp( regex , words[i] , REG_NOSUB | REG_UTF8 ) == 0 ) + #else + if ( regcomp( regex , words[i] , REG_NOSUB ) == 0 ) + #endif + regex_list = g_slist_append( regex_list , regex ); + else + g_free( regex ); + } + + g_strfreev(words); + + return regex_list; +} + +/** + * Frees the regular expression list used in searches. + */ +static void +ui_jump_to_track_cache_regex_list_free(GSList* regex_list) +{ + if ( regex_list != NULL ) + { + GSList* regex_list_tmp = regex_list; + while ( regex_list != NULL ) + { + regex_t *regex = regex_list->data; + regfree( regex ); + g_free( regex ); + regex_list = g_slist_next(regex_list); + } + g_slist_free( regex_list_tmp ); + } +} + +/** + * Checks if 'song' matches all regular expressions in 'regex_list'. + */ +static gboolean +ui_jump_to_track_match(const gchar * song, GSList *regex_list) +{ + if ( song == NULL ) + return FALSE; + + for ( ; regex_list ; regex_list = g_slist_next(regex_list) ) + { + regex_t *regex = regex_list->data; + if ( regexec( regex , song , 0 , NULL , 0 ) != 0 ) + return FALSE; + } + + return TRUE; +} + +/** + * Returns all songs that match 'keyword'. + * + * Searches are conducted against entries in 'search_space' variable + * and after the search, search result is added to 'cache'. + * + * @param cache The result of this search is added to cache. + * @param search_space Entries inside which the search is conducted. + * @param keyword Normalized string for searches. + */ +static GArray* +ui_jump_to_track_cache_match_keyword(JumpToTrackCache* cache, + const KeywordMatches* search_space, + const GString* keyword) +{ + GSList* regex_list = ui_jump_to_track_cache_regex_list_create(keyword); + GArray* track_entries = g_array_new(FALSE, FALSE, sizeof(JumpToTrackEntry*)); + GArray* normalized_titles = g_array_new(FALSE, FALSE, sizeof(gchar*)); + gboolean match = FALSE; + int i = 0; + + for (i = 0; i < search_space->normalized_titles->len; i++) + { + gchar* title = g_array_index(search_space->normalized_titles, gchar*, i); + + if (regex_list != NULL) + match = ui_jump_to_track_match(title, regex_list); + else + match = TRUE; + + if (match) { + JumpToTrackEntry* entry = g_array_index(search_space->track_entries, + JumpToTrackEntry*, i); + g_array_append_val(track_entries, entry); + g_array_append_val(normalized_titles, title); + } + } + + KeywordMatches* keyword_matches = g_new(KeywordMatches, 1); + keyword_matches->track_entries = track_entries; + keyword_matches->normalized_titles = normalized_titles; + + g_hash_table_insert(cache->keywords, + GINT_TO_POINTER(g_string_hash(keyword)), + keyword_matches); + + ui_jump_to_track_cache_regex_list_free(regex_list); + return track_entries; +} + +/** + * Normalizes the search string to be more suitable for searches. + * + * Basically this does Unicode NFKD normalization to for example match + * half-width and full-width characters and case folding mainly to match + * upper- and lowercase letters. + * + * String returned by this function should be freed manually. + */ +static gchar * +normalize_search_string(const gchar* string) +{ + gchar* normalized_string = g_utf8_normalize(string, -1, G_NORMALIZE_NFKD); + gchar* folded_string = g_utf8_casefold(normalized_string, -1); + g_free(normalized_string); + return folded_string; +} + +/** + * Frees the possibly allocated data in KeywordMatches. + */ +static void +ui_jump_to_track_cache_free_keywordmatch_data(KeywordMatches* match_entry) +{ + int i = 0; + assert(match_entry->normalized_titles->len == match_entry->track_entries->len); + for (i = 0; i < match_entry->normalized_titles->len; i++) + { + g_free(g_array_index(match_entry->normalized_titles, gchar*, i)); + g_free(g_array_index(match_entry->track_entries, PlaylistEntry*, i)); + } +} + +/** + * Frees the memory reserved for an search result. + */ +static void +ui_jump_to_track_cache_free_cache_entry(gpointer entry) +{ + KeywordMatches* match_entry = (KeywordMatches*)entry; + g_array_free(match_entry->track_entries, TRUE); + g_array_free(match_entry->normalized_titles, TRUE); +} + +/** + * Creates a new song search cache. + * + * Returned value should be freed with ui_jump_to_track_cache_free() function. + */ +JumpToTrackCache* +ui_jump_to_track_cache_new() +{ + JumpToTrackCache* cache = g_new(JumpToTrackCache, 1); + cache->playlist_serial = -1; + cache->keywords = g_hash_table_new_full(NULL, NULL, NULL, + ui_jump_to_track_cache_free_cache_entry); + return cache; +} + +/** + * Clears the search cache. + */ +static void +ui_jump_to_track_cache_clear(JumpToTrackCache* cache) +{ + GString* empty_keyword = g_string_new(""); + gpointer found_keyword = NULL; + + cache->playlist_serial = -1; + + // All normalized titles reside in an empty key "" so we'll free them + // first. + found_keyword = g_hash_table_lookup(cache->keywords, + GINT_TO_POINTER(g_string_hash(empty_keyword))); + g_string_free(empty_keyword, + TRUE); + if (found_keyword != NULL) + { + KeywordMatches* all_titles = (KeywordMatches*)found_keyword; + ui_jump_to_track_cache_free_keywordmatch_data(all_titles); + } + // Now when all normalized strings are freed, no need to worry about + // double frees or memory leaks. + g_hash_table_remove_all(cache->keywords); +} + +/** + * Initializes the search cache if cache is empty or has wrong playlist. + */ +static void +ui_jump_to_track_cache_init(JumpToTrackCache* cache, + const Playlist* playlist) +{ + if (cache->playlist_serial != playlist->serial) + { + GList* playlist_entries = NULL; + GArray* track_entries = g_array_new(FALSE, FALSE, sizeof(JumpToTrackEntry*)); + GArray* normalized_titles = g_array_new(FALSE, FALSE, sizeof(gchar*)); + GString* empty_keyword = g_string_new(""); + gulong song_index = 0; + + // Reset cache state + ui_jump_to_track_cache_clear(cache); + + cache->playlist_serial = playlist->serial; + + // Initialize cache with playlist data + for (playlist_entries = playlist->entries; + playlist_entries; + playlist_entries = g_list_next(playlist_entries)) + { + PlaylistEntry* playlist_entry = PLAYLIST_ENTRY(playlist_entries->data); + + gchar *title = NULL; + /*we are matching all the path not just the filename or title*/ + + /* + * FIXME: The search string should be adapted to the + * current display setting, e.g. if the user has set it to + * "%p - %t" then build the match string like that too, or + * even better, search for each of the tags seperatly. + * + * In any case the string to match should _never_ contain + * something the user can't actually see in the playlist. + */ + if (playlist_entry->title) { + title = normalize_search_string(playlist_entry->title); + } else { + gchar *realfn = NULL; + realfn = g_filename_from_uri(playlist_entry->filename, NULL, NULL); + gchar *tmp_title = str_to_utf8(realfn ? realfn : playlist_entry->filename); + title = normalize_search_string(tmp_title); + g_free(tmp_title); + g_free(realfn); realfn = NULL; + } + + JumpToTrackEntry* search_entry = g_new(JumpToTrackEntry, 1); + search_entry->entry = playlist_entry; + search_entry->playlist_position = song_index; + g_array_append_val(track_entries, search_entry); + g_array_append_val(normalized_titles, title); + // We need to manually keep track of the current playlist index. + song_index++; + } + // Finally insert all titles into cache into an empty key "" so that + // the matchable data has specified place to be. + KeywordMatches* keyword_data = g_new(KeywordMatches, 1); + keyword_data->track_entries = track_entries; + keyword_data->normalized_titles = normalized_titles; + g_hash_table_insert(cache->keywords, + GINT_TO_POINTER(g_string_hash(empty_keyword)), + keyword_data); + g_string_free(empty_keyword, + TRUE); + } +} + +/** + * Searches 'keyword' inside 'playlist' by using 'cache' to speed up searching. + * + * Searches are basically conducted as follows: + * + * Cache is checked if it has the information about right playlist and + * initialized with playlist data if needed. + * + * Keyword is normalized for searching (Unicode NFKD, case folding) + * + * Cache is checked if it has keyword and if it has, we can immediately get + * the search results and return. If not, searching goes as follows: + * + * Search for the longest word that is in cache that matches the beginning + * of keyword and use the cached matches as base for the current search. + * The shortest word that can be matched against is the empty string "", so + * there should always be matches in cache. + * + * After that conduct the search by splitting keyword into words separated + * by space and using regular expressions. + * + * When the keyword is searched, search result is added to cache to + * corresponding keyword that can be used as base for new searches. + * + * The motivation for caching is that to search word 'some cool song' one + * has to type following strings that are all searched individually: + * + * s + * so + * som + * some + * some + * some c + * some co + * some coo + * some cool + * some cool + * some cool s + * some cool so + * some cool son + * some cool song + * + * If the search results are cached in every phase and the result of + * the maximum length matching string is used as base for concurrent + * searches, we can probably get the matches reduced to some hundreds + * after a few letters typed on playlists with thousands of songs and + * reduce useless iteration quite a lot. + * + * Return: GArray of JumpToTrackEntry* + */ +const GArray* +ui_jump_to_track_cache_search(JumpToTrackCache* cache, + const Playlist* playlist, + const gchar* keyword) +{ + gchar* normalized_keyword = normalize_search_string(keyword); + GString* keyword_string = g_string_new(normalized_keyword); + GString* match_string = g_string_new(normalized_keyword); + gsize match_string_length = keyword_string->len; + + ui_jump_to_track_cache_init(cache, playlist); + + while (match_string_length >= 0) + { + gpointer string_ptr = GINT_TO_POINTER(g_string_hash(match_string)); + gpointer result_entries = g_hash_table_lookup(cache->keywords, + string_ptr); + if (result_entries != NULL) + { + KeywordMatches* matched_entries = (KeywordMatches*)result_entries; + // if keyword matches something we have, we'll just return the list + // of matches that the keyword has. + if (match_string_length == keyword_string->len) { + g_string_free(keyword_string, TRUE); + g_string_free(match_string, TRUE); + g_free(normalized_keyword); + return matched_entries->track_entries; + } + + // Do normal search by using the result of previous search + // as search space. + GArray* result = ui_jump_to_track_cache_match_keyword(cache, + matched_entries, + keyword_string); + g_string_free(keyword_string, TRUE); + g_string_free(match_string, TRUE); + g_free(normalized_keyword); + return result; + } + match_string_length--; + g_string_set_size(match_string, match_string_length); + } + // This should never, ever get to this point because there is _always_ + // the empty string to match against. + AUDDBG("One should never get to this point. Something is really wrong with \ +cache->keywords hash table."); + assert(FALSE); + g_return_val_if_fail(FALSE, (GArray*)-1); +} + diff -r 5b277773870e -r ca077e01ed3a src/audacious/ui_jumptotrack_cache.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/audacious/ui_jumptotrack_cache.h Mon Feb 18 20:44:40 2008 -0600 @@ -0,0 +1,47 @@ +/* Audacious - Cross-platform multimedia player + * Copyright (C) 2008 Audacious development team. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * The Audacious team does not consider modular code linking to + * Audacious or using our public API to be a derived work. + */ + +#ifndef UI_JUMPTOTRACK_CACHE_H +#define UI_JUMPTOTRACK_CACHE_H + +#include + +#include "playlist.h" + +typedef struct _JumpToTrackCache JumpToTrackCache; +typedef struct _JumpToTrackEntry JumpToTrackEntry; + +struct _JumpToTrackCache +{ + gulong playlist_serial; + GHashTable* keywords; +}; + +struct _JumpToTrackEntry +{ + PlaylistEntry* entry; + // We need to manually keep information about current playlist position. + gulong playlist_position; +}; + +extern JumpToTrackCache* ui_jump_to_track_cache_new(void); +extern const GArray* ui_jump_to_track_cache_search(JumpToTrackCache* cache, const Playlist* playlist, const gchar* keyword); +extern void ui_jump_to_track_cache_free(JumpToTrackCache* cache); + +#endif