comparison src/audlegacy/ui_jumptotrack_cache.c @ 4811:7bf7f83a217e

rename src/audacious src/audlegacy so that both audlegacy and audacious can coexist.
author Yoshiki Yazawa <yaz@honeyplanet.jp>
date Wed, 26 Nov 2008 00:44:56 +0900
parents src/audacious/ui_jumptotrack_cache.c@b58b2617fe13
children
comparison
equal deleted inserted replaced
4810:c10e53092037 4811:7bf7f83a217e
1 /* Audacious - Cross-platform multimedia player
2 * Copyright (C) 2008 Audacious development team.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; under version 3 of the License.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses>.
15 *
16 * The Audacious team does not consider modular code linking to
17 * Audacious or using our public API to be a derived work.
18 */
19
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23
24 #include <glib.h>
25 #include <string.h>
26 #include <assert.h>
27
28 #if defined(USE_REGEX_ONIGURUMA)
29 #include <onigposix.h>
30 #elif defined(USE_REGEX_PCRE)
31 #include <pcreposix.h>
32 #else
33 #include <regex.h>
34 #endif
35
36 #include "playlist.h"
37 #include "strings.h"
38
39 #include "ui_jumptotrack_cache.h"
40
41 // Struct to keep information about matches from searches.
42 typedef struct
43 {
44 GArray* track_entries; // JumpToTrackEntry*
45 GArray* normalized_titles; // gchar*
46 } KeywordMatches;
47
48 /**
49 * Creates an regular expression list usable in searches from search keyword.
50 *
51 * In searches, every regular expression on this list is matched against
52 * the search title and if they all match, the title is declared as
53 * matching one.
54 *
55 * Regular expressions in list are formed by splitting the 'keyword' to words
56 * by splitting the keyword string with space character.
57 */
58 static GSList*
59 ui_jump_to_track_cache_regex_list_create(const GString* keyword)
60 {
61 GSList *regex_list = NULL;
62 gchar **words = NULL;
63 int i = -1;
64 /* Chop the key string into ' '-separated key regex-pattern strings */
65 words = g_strsplit(keyword->str, " ", 0);
66
67 /* create a list of regex using the regex-pattern strings */
68 while ( words[++i] != NULL )
69 {
70 // Ignore empty words.
71 if (words[i][0] == 0) {
72 continue;
73 }
74 regex_t *regex = g_malloc(sizeof(regex_t));
75 #if defined(USE_REGEX_PCRE)
76 if ( regcomp( regex , words[i] , REG_NOSUB | REG_UTF8 ) == 0 )
77 #else
78 if ( regcomp( regex , words[i] , REG_NOSUB ) == 0 )
79 #endif
80 regex_list = g_slist_append( regex_list , regex );
81 else
82 g_free( regex );
83 }
84
85 g_strfreev(words);
86
87 return regex_list;
88 }
89
90 /**
91 * Frees the regular expression list used in searches.
92 */
93 static void
94 ui_jump_to_track_cache_regex_list_free(GSList* regex_list)
95 {
96 if ( regex_list != NULL )
97 {
98 GSList* regex_list_tmp = regex_list;
99 while ( regex_list != NULL )
100 {
101 regex_t *regex = regex_list->data;
102 regfree( regex );
103 g_free( regex );
104 regex_list = g_slist_next(regex_list);
105 }
106 g_slist_free( regex_list_tmp );
107 }
108 }
109
110 /**
111 * Checks if 'song' matches all regular expressions in 'regex_list'.
112 */
113 static gboolean
114 ui_jump_to_track_match(const gchar * song, GSList *regex_list)
115 {
116 if ( song == NULL )
117 return FALSE;
118
119 for ( ; regex_list ; regex_list = g_slist_next(regex_list) )
120 {
121 regex_t *regex = regex_list->data;
122 if ( regexec( regex , song , 0 , NULL , 0 ) != 0 )
123 return FALSE;
124 }
125
126 return TRUE;
127 }
128
129 /**
130 * Returns all songs that match 'keyword'.
131 *
132 * Searches are conducted against entries in 'search_space' variable
133 * and after the search, search result is added to 'cache'.
134 *
135 * @param cache The result of this search is added to cache.
136 * @param search_space Entries inside which the search is conducted.
137 * @param keyword Normalized string for searches.
138 */
139 static GArray*
140 ui_jump_to_track_cache_match_keyword(JumpToTrackCache* cache,
141 const KeywordMatches* search_space,
142 const GString* keyword)
143 {
144 GSList* regex_list = ui_jump_to_track_cache_regex_list_create(keyword);
145 GArray* track_entries = g_array_new(FALSE, FALSE, sizeof(JumpToTrackEntry*));
146 GArray* normalized_titles = g_array_new(FALSE, FALSE, sizeof(gchar*));
147 gboolean match = FALSE;
148 int i = 0;
149
150 for (i = 0; i < search_space->normalized_titles->len; i++)
151 {
152 gchar* title = g_array_index(search_space->normalized_titles, gchar*, i);
153
154 if (regex_list != NULL)
155 match = ui_jump_to_track_match(title, regex_list);
156 else
157 match = TRUE;
158
159 if (match) {
160 JumpToTrackEntry* entry = g_array_index(search_space->track_entries,
161 JumpToTrackEntry*, i);
162 g_array_append_val(track_entries, entry);
163 g_array_append_val(normalized_titles, title);
164 }
165 }
166
167 KeywordMatches* keyword_matches = g_new(KeywordMatches, 1);
168 keyword_matches->track_entries = track_entries;
169 keyword_matches->normalized_titles = normalized_titles;
170
171 g_hash_table_insert(cache->keywords,
172 GINT_TO_POINTER(g_string_hash(keyword)),
173 keyword_matches);
174
175 ui_jump_to_track_cache_regex_list_free(regex_list);
176 return track_entries;
177 }
178
179 /**
180 * Normalizes the search string to be more suitable for searches.
181 *
182 * Basically this does Unicode NFKD normalization to for example match
183 * half-width and full-width characters and case folding mainly to match
184 * upper- and lowercase letters.
185 *
186 * String returned by this function should be freed manually.
187 */
188 static gchar *
189 normalize_search_string(const gchar* string)
190 {
191 gchar* normalized_string = g_utf8_normalize(string, -1, G_NORMALIZE_NFKD);
192 gchar* folded_string = g_utf8_casefold(normalized_string, -1);
193 g_free(normalized_string);
194 return folded_string;
195 }
196
197 /**
198 * Frees the possibly allocated data in KeywordMatches.
199 */
200 static void
201 ui_jump_to_track_cache_free_keywordmatch_data(KeywordMatches* match_entry)
202 {
203 int i = 0;
204 assert(match_entry->normalized_titles->len == match_entry->track_entries->len);
205 for (i = 0; i < match_entry->normalized_titles->len; i++)
206 {
207 g_free(g_array_index(match_entry->normalized_titles, gchar*, i));
208 g_free(g_array_index(match_entry->track_entries, PlaylistEntry*, i));
209 }
210 }
211
212 /**
213 * Frees the memory reserved for an search result.
214 */
215 static void
216 ui_jump_to_track_cache_free_cache_entry(gpointer entry)
217 {
218 KeywordMatches* match_entry = (KeywordMatches*)entry;
219 g_array_free(match_entry->track_entries, TRUE);
220 g_array_free(match_entry->normalized_titles, TRUE);
221 }
222
223 /**
224 * Creates a new song search cache.
225 *
226 * Returned value should be freed with ui_jump_to_track_cache_free() function.
227 */
228 JumpToTrackCache*
229 ui_jump_to_track_cache_new()
230 {
231 JumpToTrackCache* cache = g_new(JumpToTrackCache, 1);
232 cache->playlist_serial = -1;
233 cache->keywords = g_hash_table_new_full(NULL, NULL, NULL,
234 ui_jump_to_track_cache_free_cache_entry);
235 return cache;
236 }
237
238 /**
239 * Clears the search cache.
240 */
241 static void
242 ui_jump_to_track_cache_clear(JumpToTrackCache* cache)
243 {
244 GString* empty_keyword = g_string_new("");
245 gpointer found_keyword = NULL;
246
247 cache->playlist_serial = -1;
248
249 // All normalized titles reside in an empty key "" so we'll free them
250 // first.
251 found_keyword = g_hash_table_lookup(cache->keywords,
252 GINT_TO_POINTER(g_string_hash(empty_keyword)));
253 g_string_free(empty_keyword,
254 TRUE);
255 if (found_keyword != NULL)
256 {
257 KeywordMatches* all_titles = (KeywordMatches*)found_keyword;
258 ui_jump_to_track_cache_free_keywordmatch_data(all_titles);
259 }
260 // Now when all normalized strings are freed, no need to worry about
261 // double frees or memory leaks.
262 g_hash_table_remove_all(cache->keywords);
263 }
264
265 /**
266 * Initializes the search cache if cache is empty or has wrong playlist.
267 */
268 static void
269 ui_jump_to_track_cache_init(JumpToTrackCache* cache,
270 const Playlist* playlist)
271 {
272 if (cache->playlist_serial != playlist->serial)
273 {
274 GList* playlist_entries = NULL;
275 GArray* track_entries = g_array_new(FALSE, FALSE, sizeof(JumpToTrackEntry*));
276 GArray* normalized_titles = g_array_new(FALSE, FALSE, sizeof(gchar*));
277 GString* empty_keyword = g_string_new("");
278 gulong song_index = 0;
279
280 // Reset cache state
281 ui_jump_to_track_cache_clear(cache);
282
283 cache->playlist_serial = playlist->serial;
284
285 // Initialize cache with playlist data
286 for (playlist_entries = playlist->entries;
287 playlist_entries;
288 playlist_entries = g_list_next(playlist_entries))
289 {
290 PlaylistEntry* playlist_entry = PLAYLIST_ENTRY(playlist_entries->data);
291
292 gchar *title = NULL;
293 /*we are matching all the path not just the filename or title*/
294
295 /*
296 * FIXME: The search string should be adapted to the
297 * current display setting, e.g. if the user has set it to
298 * "%p - %t" then build the match string like that too, or
299 * even better, search for each of the tags seperatly.
300 *
301 * In any case the string to match should _never_ contain
302 * something the user can't actually see in the playlist.
303 */
304 if (playlist_entry->title) {
305 title = normalize_search_string(playlist_entry->title);
306 } else {
307 gchar *realfn = NULL;
308 realfn = g_filename_from_uri(playlist_entry->filename, NULL, NULL);
309 gchar *tmp_title = str_assert_utf8(realfn ? realfn : playlist_entry->filename);
310 title = normalize_search_string(tmp_title);
311 g_free(tmp_title);
312 g_free(realfn); realfn = NULL;
313 }
314
315 JumpToTrackEntry* search_entry = g_new(JumpToTrackEntry, 1);
316 search_entry->entry = playlist_entry;
317 search_entry->playlist_position = song_index;
318 g_array_append_val(track_entries, search_entry);
319 g_array_append_val(normalized_titles, title);
320 // We need to manually keep track of the current playlist index.
321 song_index++;
322 }
323 // Finally insert all titles into cache into an empty key "" so that
324 // the matchable data has specified place to be.
325 KeywordMatches* keyword_data = g_new(KeywordMatches, 1);
326 keyword_data->track_entries = track_entries;
327 keyword_data->normalized_titles = normalized_titles;
328 g_hash_table_insert(cache->keywords,
329 GINT_TO_POINTER(g_string_hash(empty_keyword)),
330 keyword_data);
331 g_string_free(empty_keyword,
332 TRUE);
333 }
334 }
335
336 /**
337 * Searches 'keyword' inside 'playlist' by using 'cache' to speed up searching.
338 *
339 * Searches are basically conducted as follows:
340 *
341 * Cache is checked if it has the information about right playlist and
342 * initialized with playlist data if needed.
343 *
344 * Keyword is normalized for searching (Unicode NFKD, case folding)
345 *
346 * Cache is checked if it has keyword and if it has, we can immediately get
347 * the search results and return. If not, searching goes as follows:
348 *
349 * Search for the longest word that is in cache that matches the beginning
350 * of keyword and use the cached matches as base for the current search.
351 * The shortest word that can be matched against is the empty string "", so
352 * there should always be matches in cache.
353 *
354 * After that conduct the search by splitting keyword into words separated
355 * by space and using regular expressions.
356 *
357 * When the keyword is searched, search result is added to cache to
358 * corresponding keyword that can be used as base for new searches.
359 *
360 * The motivation for caching is that to search word 'some cool song' one
361 * has to type following strings that are all searched individually:
362 *
363 * s
364 * so
365 * som
366 * some
367 * some
368 * some c
369 * some co
370 * some coo
371 * some cool
372 * some cool
373 * some cool s
374 * some cool so
375 * some cool son
376 * some cool song
377 *
378 * If the search results are cached in every phase and the result of
379 * the maximum length matching string is used as base for concurrent
380 * searches, we can probably get the matches reduced to some hundreds
381 * after a few letters typed on playlists with thousands of songs and
382 * reduce useless iteration quite a lot.
383 *
384 * Return: GArray of JumpToTrackEntry*
385 */
386 const GArray*
387 ui_jump_to_track_cache_search(JumpToTrackCache* cache,
388 const Playlist* playlist,
389 const gchar* keyword)
390 {
391 gchar* normalized_keyword = normalize_search_string(keyword);
392 GString* keyword_string = g_string_new(normalized_keyword);
393 GString* match_string = g_string_new(normalized_keyword);
394 gsize match_string_length = keyword_string->len;
395
396 ui_jump_to_track_cache_init(cache, playlist);
397
398 while (match_string_length >= 0)
399 {
400 gpointer string_ptr = GINT_TO_POINTER(g_string_hash(match_string));
401 gpointer result_entries = g_hash_table_lookup(cache->keywords,
402 string_ptr);
403 if (result_entries != NULL)
404 {
405 KeywordMatches* matched_entries = (KeywordMatches*)result_entries;
406 // if keyword matches something we have, we'll just return the list
407 // of matches that the keyword has.
408 if (match_string_length == keyword_string->len) {
409 g_string_free(keyword_string, TRUE);
410 g_string_free(match_string, TRUE);
411 g_free(normalized_keyword);
412 return matched_entries->track_entries;
413 }
414
415 // Do normal search by using the result of previous search
416 // as search space.
417 GArray* result = ui_jump_to_track_cache_match_keyword(cache,
418 matched_entries,
419 keyword_string);
420 g_string_free(keyword_string, TRUE);
421 g_string_free(match_string, TRUE);
422 g_free(normalized_keyword);
423 return result;
424 }
425 match_string_length--;
426 g_string_set_size(match_string, match_string_length);
427 }
428 // This should never, ever get to this point because there is _always_
429 // the empty string to match against.
430 AUDDBG("One should never get to this point. Something is really wrong with \
431 cache->keywords hash table.");
432 assert(FALSE);
433 g_return_val_if_fail(FALSE, (GArray*)-1);
434 }
435