Mercurial > audlegacy
diff src/audacious/ui_jumptotrack_cache.c @ 4291:ca077e01ed3a
Add caching to Jump to Track feature to speed up searches. (Bugzilla #180)
author | Jussi Judin <jjudin+audacious@iki.fi> |
---|---|
date | Mon, 18 Feb 2008 20:44:40 -0600 |
parents | |
children | 23a9ded30c70 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/audacious/ui_jumptotrack_cache.c Mon Feb 18 20:44:40 2008 -0600 @@ -0,0 +1,431 @@ +/* Audacious - Cross-platform multimedia player + * Copyright (C) 2008 Audacious development team. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses>. + * + * The Audacious team does not consider modular code linking to + * Audacious or using our public API to be a derived work. + */ + +#include <glib.h> +#include <string.h> +#include <assert.h> + +#if defined(USE_REGEX_ONIGURUMA) + #include <onigposix.h> +#elif defined(USE_REGEX_PCRE) + #include <pcreposix.h> +#else + #include <regex.h> +#endif + +#include "playlist.h" +#include "strings.h" + +#include "ui_jumptotrack_cache.h" + +// Struct to keep information about matches from searches. +typedef struct +{ + GArray* track_entries; // JumpToTrackEntry* + GArray* normalized_titles; // gchar* +} KeywordMatches; + +/** + * Creates an regular expression list usable in searches from search keyword. + * + * In searches, every regular expression on this list is matched against + * the search title and if they all match, the title is declared as + * matching one. + * + * Regular expressions in list are formed by splitting the 'keyword' to words + * by splitting the keyword string with space character. + */ +static GSList* +ui_jump_to_track_cache_regex_list_create(const GString* keyword) +{ + GSList *regex_list = NULL; + gchar **words = NULL; + int i = -1; + /* Chop the key string into ' '-separated key regex-pattern strings */ + words = g_strsplit(keyword->str, " ", 0); + + /* create a list of regex using the regex-pattern strings */ + while ( words[++i] != NULL ) + { + // Ignore empty words. + if (words[i][0] == 0) { + continue; + } + regex_t *regex = g_malloc(sizeof(regex_t)); + #if defined(USE_REGEX_PCRE) + if ( regcomp( regex , words[i] , REG_NOSUB | REG_UTF8 ) == 0 ) + #else + if ( regcomp( regex , words[i] , REG_NOSUB ) == 0 ) + #endif + regex_list = g_slist_append( regex_list , regex ); + else + g_free( regex ); + } + + g_strfreev(words); + + return regex_list; +} + +/** + * Frees the regular expression list used in searches. + */ +static void +ui_jump_to_track_cache_regex_list_free(GSList* regex_list) +{ + if ( regex_list != NULL ) + { + GSList* regex_list_tmp = regex_list; + while ( regex_list != NULL ) + { + regex_t *regex = regex_list->data; + regfree( regex ); + g_free( regex ); + regex_list = g_slist_next(regex_list); + } + g_slist_free( regex_list_tmp ); + } +} + +/** + * Checks if 'song' matches all regular expressions in 'regex_list'. + */ +static gboolean +ui_jump_to_track_match(const gchar * song, GSList *regex_list) +{ + if ( song == NULL ) + return FALSE; + + for ( ; regex_list ; regex_list = g_slist_next(regex_list) ) + { + regex_t *regex = regex_list->data; + if ( regexec( regex , song , 0 , NULL , 0 ) != 0 ) + return FALSE; + } + + return TRUE; +} + +/** + * Returns all songs that match 'keyword'. + * + * Searches are conducted against entries in 'search_space' variable + * and after the search, search result is added to 'cache'. + * + * @param cache The result of this search is added to cache. + * @param search_space Entries inside which the search is conducted. + * @param keyword Normalized string for searches. + */ +static GArray* +ui_jump_to_track_cache_match_keyword(JumpToTrackCache* cache, + const KeywordMatches* search_space, + const GString* keyword) +{ + GSList* regex_list = ui_jump_to_track_cache_regex_list_create(keyword); + GArray* track_entries = g_array_new(FALSE, FALSE, sizeof(JumpToTrackEntry*)); + GArray* normalized_titles = g_array_new(FALSE, FALSE, sizeof(gchar*)); + gboolean match = FALSE; + int i = 0; + + for (i = 0; i < search_space->normalized_titles->len; i++) + { + gchar* title = g_array_index(search_space->normalized_titles, gchar*, i); + + if (regex_list != NULL) + match = ui_jump_to_track_match(title, regex_list); + else + match = TRUE; + + if (match) { + JumpToTrackEntry* entry = g_array_index(search_space->track_entries, + JumpToTrackEntry*, i); + g_array_append_val(track_entries, entry); + g_array_append_val(normalized_titles, title); + } + } + + KeywordMatches* keyword_matches = g_new(KeywordMatches, 1); + keyword_matches->track_entries = track_entries; + keyword_matches->normalized_titles = normalized_titles; + + g_hash_table_insert(cache->keywords, + GINT_TO_POINTER(g_string_hash(keyword)), + keyword_matches); + + ui_jump_to_track_cache_regex_list_free(regex_list); + return track_entries; +} + +/** + * Normalizes the search string to be more suitable for searches. + * + * Basically this does Unicode NFKD normalization to for example match + * half-width and full-width characters and case folding mainly to match + * upper- and lowercase letters. + * + * String returned by this function should be freed manually. + */ +static gchar * +normalize_search_string(const gchar* string) +{ + gchar* normalized_string = g_utf8_normalize(string, -1, G_NORMALIZE_NFKD); + gchar* folded_string = g_utf8_casefold(normalized_string, -1); + g_free(normalized_string); + return folded_string; +} + +/** + * Frees the possibly allocated data in KeywordMatches. + */ +static void +ui_jump_to_track_cache_free_keywordmatch_data(KeywordMatches* match_entry) +{ + int i = 0; + assert(match_entry->normalized_titles->len == match_entry->track_entries->len); + for (i = 0; i < match_entry->normalized_titles->len; i++) + { + g_free(g_array_index(match_entry->normalized_titles, gchar*, i)); + g_free(g_array_index(match_entry->track_entries, PlaylistEntry*, i)); + } +} + +/** + * Frees the memory reserved for an search result. + */ +static void +ui_jump_to_track_cache_free_cache_entry(gpointer entry) +{ + KeywordMatches* match_entry = (KeywordMatches*)entry; + g_array_free(match_entry->track_entries, TRUE); + g_array_free(match_entry->normalized_titles, TRUE); +} + +/** + * Creates a new song search cache. + * + * Returned value should be freed with ui_jump_to_track_cache_free() function. + */ +JumpToTrackCache* +ui_jump_to_track_cache_new() +{ + JumpToTrackCache* cache = g_new(JumpToTrackCache, 1); + cache->playlist_serial = -1; + cache->keywords = g_hash_table_new_full(NULL, NULL, NULL, + ui_jump_to_track_cache_free_cache_entry); + return cache; +} + +/** + * Clears the search cache. + */ +static void +ui_jump_to_track_cache_clear(JumpToTrackCache* cache) +{ + GString* empty_keyword = g_string_new(""); + gpointer found_keyword = NULL; + + cache->playlist_serial = -1; + + // All normalized titles reside in an empty key "" so we'll free them + // first. + found_keyword = g_hash_table_lookup(cache->keywords, + GINT_TO_POINTER(g_string_hash(empty_keyword))); + g_string_free(empty_keyword, + TRUE); + if (found_keyword != NULL) + { + KeywordMatches* all_titles = (KeywordMatches*)found_keyword; + ui_jump_to_track_cache_free_keywordmatch_data(all_titles); + } + // Now when all normalized strings are freed, no need to worry about + // double frees or memory leaks. + g_hash_table_remove_all(cache->keywords); +} + +/** + * Initializes the search cache if cache is empty or has wrong playlist. + */ +static void +ui_jump_to_track_cache_init(JumpToTrackCache* cache, + const Playlist* playlist) +{ + if (cache->playlist_serial != playlist->serial) + { + GList* playlist_entries = NULL; + GArray* track_entries = g_array_new(FALSE, FALSE, sizeof(JumpToTrackEntry*)); + GArray* normalized_titles = g_array_new(FALSE, FALSE, sizeof(gchar*)); + GString* empty_keyword = g_string_new(""); + gulong song_index = 0; + + // Reset cache state + ui_jump_to_track_cache_clear(cache); + + cache->playlist_serial = playlist->serial; + + // Initialize cache with playlist data + for (playlist_entries = playlist->entries; + playlist_entries; + playlist_entries = g_list_next(playlist_entries)) + { + PlaylistEntry* playlist_entry = PLAYLIST_ENTRY(playlist_entries->data); + + gchar *title = NULL; + /*we are matching all the path not just the filename or title*/ + + /* + * FIXME: The search string should be adapted to the + * current display setting, e.g. if the user has set it to + * "%p - %t" then build the match string like that too, or + * even better, search for each of the tags seperatly. + * + * In any case the string to match should _never_ contain + * something the user can't actually see in the playlist. + */ + if (playlist_entry->title) { + title = normalize_search_string(playlist_entry->title); + } else { + gchar *realfn = NULL; + realfn = g_filename_from_uri(playlist_entry->filename, NULL, NULL); + gchar *tmp_title = str_to_utf8(realfn ? realfn : playlist_entry->filename); + title = normalize_search_string(tmp_title); + g_free(tmp_title); + g_free(realfn); realfn = NULL; + } + + JumpToTrackEntry* search_entry = g_new(JumpToTrackEntry, 1); + search_entry->entry = playlist_entry; + search_entry->playlist_position = song_index; + g_array_append_val(track_entries, search_entry); + g_array_append_val(normalized_titles, title); + // We need to manually keep track of the current playlist index. + song_index++; + } + // Finally insert all titles into cache into an empty key "" so that + // the matchable data has specified place to be. + KeywordMatches* keyword_data = g_new(KeywordMatches, 1); + keyword_data->track_entries = track_entries; + keyword_data->normalized_titles = normalized_titles; + g_hash_table_insert(cache->keywords, + GINT_TO_POINTER(g_string_hash(empty_keyword)), + keyword_data); + g_string_free(empty_keyword, + TRUE); + } +} + +/** + * Searches 'keyword' inside 'playlist' by using 'cache' to speed up searching. + * + * Searches are basically conducted as follows: + * + * Cache is checked if it has the information about right playlist and + * initialized with playlist data if needed. + * + * Keyword is normalized for searching (Unicode NFKD, case folding) + * + * Cache is checked if it has keyword and if it has, we can immediately get + * the search results and return. If not, searching goes as follows: + * + * Search for the longest word that is in cache that matches the beginning + * of keyword and use the cached matches as base for the current search. + * The shortest word that can be matched against is the empty string "", so + * there should always be matches in cache. + * + * After that conduct the search by splitting keyword into words separated + * by space and using regular expressions. + * + * When the keyword is searched, search result is added to cache to + * corresponding keyword that can be used as base for new searches. + * + * The motivation for caching is that to search word 'some cool song' one + * has to type following strings that are all searched individually: + * + * s + * so + * som + * some + * some + * some c + * some co + * some coo + * some cool + * some cool + * some cool s + * some cool so + * some cool son + * some cool song + * + * If the search results are cached in every phase and the result of + * the maximum length matching string is used as base for concurrent + * searches, we can probably get the matches reduced to some hundreds + * after a few letters typed on playlists with thousands of songs and + * reduce useless iteration quite a lot. + * + * Return: GArray of JumpToTrackEntry* + */ +const GArray* +ui_jump_to_track_cache_search(JumpToTrackCache* cache, + const Playlist* playlist, + const gchar* keyword) +{ + gchar* normalized_keyword = normalize_search_string(keyword); + GString* keyword_string = g_string_new(normalized_keyword); + GString* match_string = g_string_new(normalized_keyword); + gsize match_string_length = keyword_string->len; + + ui_jump_to_track_cache_init(cache, playlist); + + while (match_string_length >= 0) + { + gpointer string_ptr = GINT_TO_POINTER(g_string_hash(match_string)); + gpointer result_entries = g_hash_table_lookup(cache->keywords, + string_ptr); + if (result_entries != NULL) + { + KeywordMatches* matched_entries = (KeywordMatches*)result_entries; + // if keyword matches something we have, we'll just return the list + // of matches that the keyword has. + if (match_string_length == keyword_string->len) { + g_string_free(keyword_string, TRUE); + g_string_free(match_string, TRUE); + g_free(normalized_keyword); + return matched_entries->track_entries; + } + + // Do normal search by using the result of previous search + // as search space. + GArray* result = ui_jump_to_track_cache_match_keyword(cache, + matched_entries, + keyword_string); + g_string_free(keyword_string, TRUE); + g_string_free(match_string, TRUE); + g_free(normalized_keyword); + return result; + } + match_string_length--; + g_string_set_size(match_string, match_string_length); + } + // This should never, ever get to this point because there is _always_ + // the empty string to match against. + AUDDBG("One should never get to this point. Something is really wrong with \ +cache->keywords hash table."); + assert(FALSE); + g_return_val_if_fail(FALSE, (GArray*)-1); +} +