Mercurial > pidgin
diff src/gtksourceiter.c @ 7358:78c1fc730dc4
[gaim-migrate @ 7951]
Case-insensitive searching in gtkimhtml.
committer: Tailor Script <tailor@pidgin.im>
author | Sean Egan <seanegan@gmail.com> |
---|---|
date | Tue, 28 Oct 2003 00:29:32 +0000 |
parents | |
children | fa6395637e2c |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/gtksourceiter.c Tue Oct 28 00:29:32 2003 +0000 @@ -0,0 +1,711 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- + * gtksourceiter.h + * + * Copyright (C) 2000, 2002 Paolo Maggi + * Copyright (C) 2002, 2003 Jeroen Zwartepoorte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/* + * Parts of this file are copied from the gedit and glimmer project. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <string.h> +#include "gtksourceiter.h" + +#define GTK_TEXT_UNKNOWN_CHAR 0xFFFC + +static gchar * +g_utf8_strcasestr (const gchar *haystack, const gchar *needle) +{ + gsize needle_len; + gsize haystack_len; + gchar *ret = NULL; + gchar *p; + gchar *casefold; + gchar *caseless_haystack; + gint i; + + g_return_val_if_fail (haystack != NULL, NULL); + g_return_val_if_fail (needle != NULL, NULL); + + casefold = g_utf8_casefold (haystack, -1); + caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL); + g_free (casefold); + + needle_len = g_utf8_strlen (needle, -1); + haystack_len = g_utf8_strlen (caseless_haystack, -1); + + if (needle_len == 0) + { + ret = (gchar *)haystack; + goto finally_1; + } + + if (haystack_len < needle_len) + { + ret = NULL; + goto finally_1; + } + + p = (gchar*)caseless_haystack; + needle_len = strlen (needle); + i = 0; + + while (*p) + { + if ((strncmp (p, needle, needle_len) == 0)) + { + ret = g_utf8_offset_to_pointer (haystack, i); + goto finally_1; + } + + p = g_utf8_next_char (p); + i++; + } + +finally_1: + g_free (caseless_haystack); + + return ret; +} + +static gchar * +g_utf8_strrcasestr (const gchar *haystack, const gchar *needle) +{ + gsize needle_len; + gsize haystack_len; + gchar *ret = NULL; + gchar *p; + gchar *casefold; + gchar *caseless_haystack; + gint i; + + g_return_val_if_fail (haystack != NULL, NULL); + g_return_val_if_fail (needle != NULL, NULL); + + casefold = g_utf8_casefold (haystack, -1); + caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL); + g_free (casefold); + + needle_len = g_utf8_strlen (needle, -1); + haystack_len = g_utf8_strlen (caseless_haystack, -1); + + if (needle_len == 0) + { + ret = (gchar *)haystack; + goto finally_1; + } + + if (haystack_len < needle_len) + { + ret = NULL; + goto finally_1; + } + + haystack_len = strlen (caseless_haystack); + needle_len = strlen (needle); + p = (gchar *)caseless_haystack + haystack_len - needle_len; + i = haystack_len - needle_len; + + while (p >= caseless_haystack) + { + if (strncasecmp (p, needle, needle_len) == 0) + { + ret = g_utf8_offset_to_pointer (haystack, i); + goto finally_1; + } + + p = g_utf8_prev_char (p); + i--; + } + +finally_1: + g_free (caseless_haystack); + + return ret; +} + +static gboolean +g_utf8_caselessnmatch (const char *s1, const char *s2, + gssize n1, gssize n2) +{ + gchar *casefold; + gchar *normalized_s1; + gchar *normalized_s2; + gint len_s1; + gint len_s2; + gboolean ret = FALSE; + + g_return_val_if_fail (s1 != NULL, FALSE); + g_return_val_if_fail (s2 != NULL, FALSE); + g_return_val_if_fail (n1 > 0, FALSE); + g_return_val_if_fail (n2 > 0, FALSE); + + casefold = g_utf8_casefold (s1, n1); + normalized_s1 = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL); + g_free (casefold); + + casefold = g_utf8_casefold (s2, n2); + normalized_s2 = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL); + g_free (casefold); + + len_s1 = strlen (normalized_s1); + len_s2 = strlen (normalized_s2); + + if (len_s1 < len_s2) + goto finally_2; + + ret = (strncmp (normalized_s1, normalized_s2, len_s2) == 0); + +finally_2: + g_free (normalized_s1); + g_free (normalized_s2); + + return ret; +} + +static void +forward_chars_with_skipping (GtkTextIter *iter, + gint count, + gboolean skip_invisible, + gboolean skip_nontext) +{ + gint i; + + g_return_if_fail (count >= 0); + + i = count; + + while (i > 0) + { + gboolean ignored = FALSE; + + if (skip_nontext && gtk_text_iter_get_char (iter) == GTK_TEXT_UNKNOWN_CHAR) + ignored = TRUE; + + if (!ignored && skip_invisible && + /* _gtk_text_btree_char_is_invisible (iter)*/ FALSE) + ignored = TRUE; + + gtk_text_iter_forward_char (iter); + + if (!ignored) + --i; + } +} + +static gboolean +lines_match (const GtkTextIter *start, + const gchar **lines, + gboolean visible_only, + gboolean slice, + GtkTextIter *match_start, + GtkTextIter *match_end) +{ + GtkTextIter next; + gchar *line_text; + const gchar *found; + gint offset; + + if (*lines == NULL || **lines == '\0') + { + if (match_start) + *match_start = *start; + if (match_end) + *match_end = *start; + return TRUE; + } + + next = *start; + gtk_text_iter_forward_line (&next); + + /* No more text in buffer, but *lines is nonempty */ + if (gtk_text_iter_equal (start, &next)) + return FALSE; + + if (slice) + { + if (visible_only) + line_text = gtk_text_iter_get_visible_slice (start, &next); + else + line_text = gtk_text_iter_get_slice (start, &next); + } + else + { + if (visible_only) + line_text = gtk_text_iter_get_visible_text (start, &next); + else + line_text = gtk_text_iter_get_text (start, &next); + } + + if (match_start) /* if this is the first line we're matching */ + { + found = g_utf8_strcasestr (line_text, *lines); + } + else + { + /* If it's not the first line, we have to match from the + * start of the line. + */ + if (g_utf8_caselessnmatch (line_text, *lines, strlen (line_text), + strlen (*lines))) + found = line_text; + else + found = NULL; + } + + if (found == NULL) + { + g_free (line_text); + return FALSE; + } + + /* Get offset to start of search string */ + offset = g_utf8_strlen (line_text, found - line_text); + + next = *start; + + /* If match start needs to be returned, set it to the + * start of the search string. + */ + if (match_start) + { + *match_start = next; + + forward_chars_with_skipping (match_start, offset, + visible_only, !slice); + } + + /* Go to end of search string */ + offset += g_utf8_strlen (*lines, -1); + + forward_chars_with_skipping (&next, offset, visible_only, !slice); + + g_free (line_text); + + ++lines; + + if (match_end) + *match_end = next; + + /* pass NULL for match_start, since we don't need to find the + * start again. + */ + return lines_match (&next, lines, visible_only, slice, NULL, match_end); +} + +static gboolean +backward_lines_match (const GtkTextIter *start, + const gchar **lines, + gboolean visible_only, + gboolean slice, + GtkTextIter *match_start, + GtkTextIter *match_end) +{ + GtkTextIter line, next; + gchar *line_text; + const gchar *found; + gint offset; + + if (*lines == NULL || **lines == '\0') + { + if (match_start) + *match_start = *start; + if (match_end) + *match_end = *start; + return TRUE; + } + + line = next = *start; + if (gtk_text_iter_get_line_offset (&next) == 0) + { + if (!gtk_text_iter_backward_line (&next)) + return FALSE; + } + else + gtk_text_iter_set_line_offset (&next, 0); + + if (slice) + { + if (visible_only) + line_text = gtk_text_iter_get_visible_slice (&next, &line); + else + line_text = gtk_text_iter_get_slice (&next, &line); + } + else + { + if (visible_only) + line_text = gtk_text_iter_get_visible_text (&next, &line); + else + line_text = gtk_text_iter_get_text (&next, &line); + } + + if (match_start) /* if this is the first line we're matching */ + { + found = g_utf8_strrcasestr (line_text, *lines); + } + else + { + /* If it's not the first line, we have to match from the + * start of the line. + */ + if (g_utf8_caselessnmatch (line_text, *lines, strlen (line_text), + strlen (*lines))) + found = line_text; + else + found = NULL; + } + + if (found == NULL) + { + g_free (line_text); + return FALSE; + } + + /* Get offset to start of search string */ + offset = g_utf8_strlen (line_text, found - line_text); + + /* If match start needs to be returned, set it to the + * start of the search string. + */ + if (match_start) + { + *match_start = next; + gtk_text_iter_set_visible_line_offset (match_start, offset); + } + + /* Go to end of search string */ + offset += g_utf8_strlen (*lines, -1); + + forward_chars_with_skipping (&next, offset, visible_only, !slice); + + g_free (line_text); + + ++lines; + + if (match_end) + *match_end = next; + + /* try to match the rest of the lines forward, passing NULL + * for match_start so lines_match will try to match the entire + * line */ + return lines_match (&next, lines, visible_only, + slice, NULL, match_end); +} + +/* strsplit () that retains the delimiter as part of the string. */ +static gchar ** +strbreakup (const char *string, + const char *delimiter, + gint max_tokens) +{ + GSList *string_list = NULL, *slist; + gchar **str_array, *s, *casefold, *new_string; + guint i, n = 1; + + g_return_val_if_fail (string != NULL, NULL); + g_return_val_if_fail (delimiter != NULL, NULL); + + if (max_tokens < 1) + max_tokens = G_MAXINT; + + s = strstr (string, delimiter); + if (s) + { + guint delimiter_len = strlen (delimiter); + + do + { + guint len; + + len = s - string + delimiter_len; + new_string = g_new (gchar, len + 1); + strncpy (new_string, string, len); + new_string[len] = 0; + casefold = g_utf8_casefold (new_string, -1); + g_free (new_string); + new_string = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL); + g_free (casefold); + string_list = g_slist_prepend (string_list, new_string); + n++; + string = s + delimiter_len; + s = strstr (string, delimiter); + } while (--max_tokens && s); + } + + if (*string) + { + n++; + casefold = g_utf8_casefold (string, -1); + new_string = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL); + g_free (casefold); + string_list = g_slist_prepend (string_list, new_string); + } + + str_array = g_new (gchar*, n); + + i = n - 1; + + str_array[i--] = NULL; + for (slist = string_list; slist; slist = slist->next) + str_array[i--] = slist->data; + + g_slist_free (string_list); + + return str_array; +} + +/** + * gtk_source_iter_forward_search: + * @iter: start of search + * @str: a search string + * @flags: flags affecting how the search is done + * @match_start: return location for start of match, or %NULL + * @match_end: return location for end of match, or %NULL + * @limit: bound for the search, or %NULL for the end of the buffer + * + * Searches forward for @str. Any match is returned by setting + * @match_start to the first character of the match and @match_end to the + * first character after the match. The search will not continue past + * @limit. Note that a search is a linear or O(n) operation, so you + * may wish to use @limit to avoid locking up your UI on large + * buffers. + * + * If the #GTK_SOURCE_SEARCH_VISIBLE_ONLY flag is present, the match may + * have invisible text interspersed in @str. i.e. @str will be a + * possibly-noncontiguous subsequence of the matched range. similarly, + * if you specify #GTK_SOURCE_SEARCH_TEXT_ONLY, the match may have + * pixbufs or child widgets mixed inside the matched range. If these + * flags are not given, the match must be exact; the special 0xFFFC + * character in @str will match embedded pixbufs or child widgets. + * If you specify the #GTK_SOURCE_SEARCH_CASE_INSENSITIVE flag, the text will + * be matched regardless of what case it is in. + * + * Same as gtk_text_iter_forward_search(), but supports case insensitive + * searching. + * + * Return value: whether a match was found + **/ +gboolean +gtk_source_iter_forward_search (const GtkTextIter *iter, + const gchar *str, + GtkSourceSearchFlags flags, + GtkTextIter *match_start, + GtkTextIter *match_end, + const GtkTextIter *limit) +{ + gchar **lines = NULL; + GtkTextIter match; + gboolean retval = FALSE; + GtkTextIter search; + gboolean visible_only; + gboolean slice; + + g_return_val_if_fail (iter != NULL, FALSE); + g_return_val_if_fail (str != NULL, FALSE); + + if ((flags & GTK_SOURCE_SEARCH_CASE_INSENSITIVE) == 0) + return gtk_text_iter_forward_search (iter, str, flags, + match_start, match_end, + limit); + + if (limit && gtk_text_iter_compare (iter, limit) >= 0) + return FALSE; + + if (*str == '\0') + { + /* If we can move one char, return the empty string there */ + match = *iter; + + if (gtk_text_iter_forward_char (&match)) + { + if (limit && gtk_text_iter_equal (&match, limit)) + return FALSE; + + if (match_start) + *match_start = match; + if (match_end) + *match_end = match; + return TRUE; + } + else + { + return FALSE; + } + } + + visible_only = (flags & GTK_SOURCE_SEARCH_VISIBLE_ONLY) != 0; + slice = (flags & GTK_SOURCE_SEARCH_TEXT_ONLY) == 0; + + /* locate all lines */ + lines = strbreakup (str, "\n", -1); + + search = *iter; + + do + { + /* This loop has an inefficient worst-case, where + * gtk_text_iter_get_text () is called repeatedly on + * a single line. + */ + GtkTextIter end; + + if (limit && gtk_text_iter_compare (&search, limit) >= 0) + break; + + if (lines_match (&search, (const gchar**)lines, + visible_only, slice, &match, &end)) + { + if (limit == NULL || (limit && + gtk_text_iter_compare (&end, limit) < 0)) + { + retval = TRUE; + + if (match_start) + *match_start = match; + if (match_end) + *match_end = end; + } + break; + } + } while (gtk_text_iter_forward_line (&search)); + + g_strfreev ((gchar**)lines); + + return retval; +} + +/** + * gtk_source_iter_backward_search: + * @iter: a #GtkTextIter where the search begins + * @str: search string + * @flags: bitmask of flags affecting the search + * @match_start: return location for start of match, or %NULL + * @match_end: return location for end of match, or %NULL + * @limit: location of last possible @match_start, or %NULL for start of buffer + * + * Same as gtk_text_iter_backward_search(), but supports case insensitive + * searching. + * + * Return value: whether a match was found + **/ +gboolean +gtk_source_iter_backward_search (const GtkTextIter *iter, + const gchar *str, + GtkSourceSearchFlags flags, + GtkTextIter *match_start, + GtkTextIter *match_end, + const GtkTextIter *limit) +{ + gchar **lines = NULL; + GtkTextIter match; + gboolean retval = FALSE; + GtkTextIter search; + gboolean visible_only; + gboolean slice; + + g_return_val_if_fail (iter != NULL, FALSE); + g_return_val_if_fail (str != NULL, FALSE); + + if ((flags & GTK_SOURCE_SEARCH_CASE_INSENSITIVE) == 0) + return gtk_text_iter_backward_search (iter, str, flags, + match_start, match_end, + limit); + + if (limit && gtk_text_iter_compare (iter, limit) <= 0) + return FALSE; + + if (*str == '\0') + { + /* If we can move one char, return the empty string there */ + match = *iter; + + if (gtk_text_iter_backward_char (&match)) + { + if (limit && gtk_text_iter_equal (&match, limit)) + return FALSE; + + if (match_start) + *match_start = match; + if (match_end) + *match_end = match; + return TRUE; + } + else + { + return FALSE; + } + } + + visible_only = (flags & GTK_SOURCE_SEARCH_VISIBLE_ONLY) != 0; + slice = (flags & GTK_SOURCE_SEARCH_TEXT_ONLY) == 0; + + /* locate all lines */ + lines = strbreakup (str, "\n", -1); + + search = *iter; + + while (TRUE) + { + /* This loop has an inefficient worst-case, where + * gtk_text_iter_get_text () is called repeatedly on + * a single line. + */ + GtkTextIter end; + + if (limit && gtk_text_iter_compare (&search, limit) <= 0) + break; + + if (backward_lines_match (&search, (const gchar**)lines, + visible_only, slice, &match, &end)) + { + if (limit == NULL || (limit && + gtk_text_iter_compare (&end, limit) > 0)) + { + retval = TRUE; + + if (match_start) + *match_start = match; + if (match_end) + *match_end = end; + } + break; + } + + if (gtk_text_iter_get_line_offset (&search) == 0) + { + if (!gtk_text_iter_backward_line (&search)) + break; + } + else + { + gtk_text_iter_set_line_offset (&search, 0); + } + } + + g_strfreev ((gchar**)lines); + + return retval; +} + +/* + * gtk_source_iter_find_matching_bracket is implemented in gtksourcebuffer.c + */