diff src/gtksourceiter.c @ 7358:78c1fc730dc4

[gaim-migrate @ 7951] Case-insensitive searching in gtkimhtml. committer: Tailor Script <tailor@pidgin.im>
author Sean Egan <seanegan@gmail.com>
date Tue, 28 Oct 2003 00:29:32 +0000
parents
children fa6395637e2c
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/gtksourceiter.c	Tue Oct 28 00:29:32 2003 +0000
@@ -0,0 +1,711 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- 
+ *  gtksourceiter.h
+ *
+ *  Copyright (C) 2000, 2002 Paolo Maggi 
+ *  Copyright (C) 2002, 2003 Jeroen Zwartepoorte
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU Library General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Library General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Parts of this file are copied from the gedit and glimmer project.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+#include "gtksourceiter.h"
+
+#define GTK_TEXT_UNKNOWN_CHAR 0xFFFC
+
+static gchar *
+g_utf8_strcasestr (const gchar *haystack, const gchar *needle)
+{
+	gsize needle_len;
+	gsize haystack_len;
+	gchar *ret = NULL;
+	gchar *p;
+	gchar *casefold;
+	gchar *caseless_haystack;
+	gint i;
+
+	g_return_val_if_fail (haystack != NULL, NULL);
+	g_return_val_if_fail (needle != NULL, NULL);
+
+	casefold = g_utf8_casefold (haystack, -1);
+	caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
+	g_free (casefold);
+
+	needle_len = g_utf8_strlen (needle, -1);
+	haystack_len = g_utf8_strlen (caseless_haystack, -1);
+
+	if (needle_len == 0)
+	{
+		ret = (gchar *)haystack;
+		goto finally_1;
+	}
+
+	if (haystack_len < needle_len)
+	{
+		ret = NULL;
+		goto finally_1;
+	}
+
+	p = (gchar*)caseless_haystack;
+	needle_len = strlen (needle);
+	i = 0;
+
+	while (*p)
+	{
+		if ((strncmp (p, needle, needle_len) == 0))
+		{
+			ret = g_utf8_offset_to_pointer (haystack, i);
+			goto finally_1;
+		}
+
+		p = g_utf8_next_char (p);
+		i++;
+	}
+
+finally_1:
+	g_free (caseless_haystack);
+
+	return ret;
+}
+
+static gchar *
+g_utf8_strrcasestr (const gchar *haystack, const gchar *needle)
+{
+	gsize needle_len;
+	gsize haystack_len;
+	gchar *ret = NULL;
+	gchar *p;
+	gchar *casefold;
+	gchar *caseless_haystack;
+	gint i;
+
+	g_return_val_if_fail (haystack != NULL, NULL);
+	g_return_val_if_fail (needle != NULL, NULL);
+
+	casefold = g_utf8_casefold (haystack, -1);
+	caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
+	g_free (casefold);
+
+	needle_len = g_utf8_strlen (needle, -1);
+	haystack_len = g_utf8_strlen (caseless_haystack, -1);
+
+	if (needle_len == 0)
+	{
+		ret = (gchar *)haystack;
+		goto finally_1;
+	}
+
+	if (haystack_len < needle_len)
+	{
+		ret = NULL;
+		goto finally_1;
+	}
+
+	haystack_len = strlen (caseless_haystack);
+	needle_len = strlen (needle);
+	p = (gchar *)caseless_haystack + haystack_len - needle_len;
+	i = haystack_len - needle_len;
+
+	while (p >= caseless_haystack)
+	{
+		if (strncasecmp (p, needle, needle_len) == 0)
+		{
+			ret = g_utf8_offset_to_pointer (haystack, i);
+			goto finally_1;
+		}
+
+		p = g_utf8_prev_char (p);
+		i--;
+	}
+
+finally_1:
+	g_free (caseless_haystack);
+
+	return ret;
+}
+
+static gboolean
+g_utf8_caselessnmatch (const char *s1, const char *s2,
+		       gssize n1, gssize n2)
+{
+	gchar *casefold;
+	gchar *normalized_s1;
+	gchar *normalized_s2;
+	gint len_s1;
+	gint len_s2;
+	gboolean ret = FALSE;
+
+	g_return_val_if_fail (s1 != NULL, FALSE);
+	g_return_val_if_fail (s2 != NULL, FALSE);
+	g_return_val_if_fail (n1 > 0, FALSE);
+	g_return_val_if_fail (n2 > 0, FALSE);
+
+	casefold = g_utf8_casefold (s1, n1);
+	normalized_s1 = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
+	g_free (casefold);
+
+	casefold = g_utf8_casefold (s2, n2);
+	normalized_s2 = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
+	g_free (casefold);
+
+	len_s1 = strlen (normalized_s1);
+	len_s2 = strlen (normalized_s2);
+
+	if (len_s1 < len_s2)
+		goto finally_2;
+
+	ret = (strncmp (normalized_s1, normalized_s2, len_s2) == 0);
+
+finally_2:
+	g_free (normalized_s1);
+	g_free (normalized_s2);	
+
+	return ret;
+}
+
+static void
+forward_chars_with_skipping (GtkTextIter *iter,
+			     gint         count,
+			     gboolean     skip_invisible,
+			     gboolean     skip_nontext)
+{
+	gint i;
+
+	g_return_if_fail (count >= 0);
+
+	i = count;
+
+	while (i > 0)
+	{
+		gboolean ignored = FALSE;
+
+		if (skip_nontext && gtk_text_iter_get_char (iter) == GTK_TEXT_UNKNOWN_CHAR)
+			ignored = TRUE;
+
+		if (!ignored && skip_invisible &&
+		    /* _gtk_text_btree_char_is_invisible (iter)*/ FALSE)
+			ignored = TRUE;
+
+		gtk_text_iter_forward_char (iter);
+
+		if (!ignored)
+			--i;
+	}
+}
+
+static gboolean
+lines_match (const GtkTextIter *start,
+	     const gchar      **lines,
+	     gboolean           visible_only,
+	     gboolean           slice,
+	     GtkTextIter       *match_start,
+	     GtkTextIter       *match_end)
+{
+	GtkTextIter next;
+	gchar *line_text;
+	const gchar *found;
+	gint offset;
+
+	if (*lines == NULL || **lines == '\0')
+	{
+		if (match_start)
+			*match_start = *start;
+		if (match_end)
+			*match_end = *start;
+		return TRUE;
+	}
+
+	next = *start;
+	gtk_text_iter_forward_line (&next);
+
+	/* No more text in buffer, but *lines is nonempty */
+	if (gtk_text_iter_equal (start, &next))
+		return FALSE;
+
+	if (slice)
+	{
+		if (visible_only)
+			line_text = gtk_text_iter_get_visible_slice (start, &next);
+		else
+			line_text = gtk_text_iter_get_slice (start, &next);
+	}
+	else
+	{
+		if (visible_only)
+			line_text = gtk_text_iter_get_visible_text (start, &next);
+		else
+			line_text = gtk_text_iter_get_text (start, &next);
+	}
+
+	if (match_start) /* if this is the first line we're matching */
+	{
+		found = g_utf8_strcasestr (line_text, *lines);
+	}
+	else
+	{
+		/* If it's not the first line, we have to match from the
+		 * start of the line.
+		 */
+		if (g_utf8_caselessnmatch (line_text, *lines, strlen (line_text),
+					   strlen (*lines)))
+			found = line_text;
+		else
+			found = NULL;
+	}
+
+	if (found == NULL)
+	{
+		g_free (line_text);
+		return FALSE;
+	}
+
+	/* Get offset to start of search string */
+	offset = g_utf8_strlen (line_text, found - line_text);
+
+	next = *start;
+
+	/* If match start needs to be returned, set it to the
+	 * start of the search string.
+	 */
+	if (match_start)
+	{
+		*match_start = next;
+
+		forward_chars_with_skipping (match_start, offset,
+					     visible_only, !slice);
+	}
+
+	/* Go to end of search string */
+	offset += g_utf8_strlen (*lines, -1);
+
+	forward_chars_with_skipping (&next, offset, visible_only, !slice);
+
+	g_free (line_text);
+
+	++lines;
+
+	if (match_end)
+		*match_end = next;
+
+	/* pass NULL for match_start, since we don't need to find the
+	 * start again.
+	 */
+	return lines_match (&next, lines, visible_only, slice, NULL, match_end);
+}
+
+static gboolean
+backward_lines_match (const GtkTextIter *start,
+		      const gchar      **lines,
+		      gboolean           visible_only,
+		      gboolean           slice,
+		      GtkTextIter       *match_start,
+		      GtkTextIter       *match_end)
+{
+	GtkTextIter line, next;
+	gchar *line_text;
+	const gchar *found;
+	gint offset;
+
+	if (*lines == NULL || **lines == '\0')
+	{
+		if (match_start)
+			*match_start = *start;
+		if (match_end)
+			*match_end = *start;
+		return TRUE;
+	}
+
+	line = next = *start;
+	if (gtk_text_iter_get_line_offset (&next) == 0)
+	{
+		if (!gtk_text_iter_backward_line (&next))
+			return FALSE;
+	}
+	else
+		gtk_text_iter_set_line_offset (&next, 0);
+
+	if (slice)
+	{
+		if (visible_only)
+			line_text = gtk_text_iter_get_visible_slice (&next, &line);
+		else
+			line_text = gtk_text_iter_get_slice (&next, &line);
+	}
+	else
+	{
+		if (visible_only)
+			line_text = gtk_text_iter_get_visible_text (&next, &line);
+		else
+			line_text = gtk_text_iter_get_text (&next, &line);
+	}
+
+	if (match_start) /* if this is the first line we're matching */
+	{
+		found = g_utf8_strrcasestr (line_text, *lines);
+	}
+	else
+	{
+		/* If it's not the first line, we have to match from the
+		 * start of the line.
+		 */
+		if (g_utf8_caselessnmatch (line_text, *lines, strlen (line_text),
+					   strlen (*lines)))
+			found = line_text;
+		else
+			found = NULL;
+	}
+
+	if (found == NULL)
+	{
+		g_free (line_text);
+		return FALSE;
+	}
+
+	/* Get offset to start of search string */
+	offset = g_utf8_strlen (line_text, found - line_text);
+
+	/* If match start needs to be returned, set it to the
+	 * start of the search string.
+	 */
+	if (match_start)
+	{
+		*match_start = next;
+		gtk_text_iter_set_visible_line_offset (match_start, offset);
+	}
+
+	/* Go to end of search string */
+	offset += g_utf8_strlen (*lines, -1);
+
+	forward_chars_with_skipping (&next, offset, visible_only, !slice);
+
+	g_free (line_text);
+
+	++lines;
+
+	if (match_end)
+		*match_end = next;
+
+	/* try to match the rest of the lines forward, passing NULL
+	 * for match_start so lines_match will try to match the entire
+	 * line */
+	return lines_match (&next, lines, visible_only,
+			    slice, NULL, match_end);
+}
+
+/* strsplit () that retains the delimiter as part of the string. */
+static gchar **
+strbreakup (const char *string,
+	    const char *delimiter,
+	    gint        max_tokens)
+{
+	GSList *string_list = NULL, *slist;
+	gchar **str_array, *s, *casefold, *new_string;
+	guint i, n = 1;
+
+	g_return_val_if_fail (string != NULL, NULL);
+	g_return_val_if_fail (delimiter != NULL, NULL);
+
+	if (max_tokens < 1)
+		max_tokens = G_MAXINT;
+
+	s = strstr (string, delimiter);
+	if (s)
+	{
+		guint delimiter_len = strlen (delimiter);
+
+		do
+		{
+			guint len;
+
+			len = s - string + delimiter_len;
+			new_string = g_new (gchar, len + 1);
+			strncpy (new_string, string, len);
+			new_string[len] = 0;
+			casefold = g_utf8_casefold (new_string, -1);
+			g_free (new_string);
+			new_string = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
+			g_free (casefold);
+			string_list = g_slist_prepend (string_list, new_string);
+			n++;
+			string = s + delimiter_len;
+			s = strstr (string, delimiter);
+		} while (--max_tokens && s);
+	}
+
+	if (*string)
+	{
+		n++;
+		casefold = g_utf8_casefold (string, -1);
+		new_string = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
+		g_free (casefold);
+		string_list = g_slist_prepend (string_list, new_string);
+	}
+
+	str_array = g_new (gchar*, n);
+
+	i = n - 1;
+
+	str_array[i--] = NULL;
+	for (slist = string_list; slist; slist = slist->next)
+		str_array[i--] = slist->data;
+
+	g_slist_free (string_list);
+
+	return str_array;
+}
+
+/**
+ * gtk_source_iter_forward_search:
+ * @iter: start of search
+ * @str: a search string
+ * @flags: flags affecting how the search is done
+ * @match_start: return location for start of match, or %NULL
+ * @match_end: return location for end of match, or %NULL
+ * @limit: bound for the search, or %NULL for the end of the buffer
+ * 
+ * Searches forward for @str. Any match is returned by setting 
+ * @match_start to the first character of the match and @match_end to the 
+ * first character after the match. The search will not continue past
+ * @limit. Note that a search is a linear or O(n) operation, so you
+ * may wish to use @limit to avoid locking up your UI on large
+ * buffers.
+ * 
+ * If the #GTK_SOURCE_SEARCH_VISIBLE_ONLY flag is present, the match may
+ * have invisible text interspersed in @str. i.e. @str will be a
+ * possibly-noncontiguous subsequence of the matched range. similarly,
+ * if you specify #GTK_SOURCE_SEARCH_TEXT_ONLY, the match may have
+ * pixbufs or child widgets mixed inside the matched range. If these
+ * flags are not given, the match must be exact; the special 0xFFFC
+ * character in @str will match embedded pixbufs or child widgets.
+ * If you specify the #GTK_SOURCE_SEARCH_CASE_INSENSITIVE flag, the text will
+ * be matched regardless of what case it is in.
+ *
+ * Same as gtk_text_iter_forward_search(), but supports case insensitive
+ * searching.
+ * 
+ * Return value: whether a match was found
+ **/
+gboolean
+gtk_source_iter_forward_search (const GtkTextIter   *iter,
+				const gchar         *str,
+				GtkSourceSearchFlags flags,
+				GtkTextIter         *match_start,
+				GtkTextIter         *match_end,
+				const GtkTextIter   *limit)
+{
+	gchar **lines = NULL;
+	GtkTextIter match;
+	gboolean retval = FALSE;
+	GtkTextIter search;
+	gboolean visible_only;
+	gboolean slice;
+
+	g_return_val_if_fail (iter != NULL, FALSE);
+	g_return_val_if_fail (str != NULL, FALSE);
+
+	if ((flags & GTK_SOURCE_SEARCH_CASE_INSENSITIVE) == 0)
+		return gtk_text_iter_forward_search (iter, str, flags,
+						     match_start, match_end,
+						     limit); 
+
+	if (limit && gtk_text_iter_compare (iter, limit) >= 0)
+		return FALSE;
+
+	if (*str == '\0')
+	{
+		/* If we can move one char, return the empty string there */
+		match = *iter;
+
+		if (gtk_text_iter_forward_char (&match))
+		{
+			if (limit && gtk_text_iter_equal (&match, limit))
+				return FALSE;
+
+			if (match_start)
+				*match_start = match;
+			if (match_end)
+				*match_end = match;
+			return TRUE;
+		}
+		else
+		{
+			return FALSE;
+		}
+	}
+
+	visible_only = (flags & GTK_SOURCE_SEARCH_VISIBLE_ONLY) != 0;
+	slice = (flags & GTK_SOURCE_SEARCH_TEXT_ONLY) == 0;
+
+	/* locate all lines */
+	lines = strbreakup (str, "\n", -1);
+
+	search = *iter;
+
+	do
+	{
+		/* This loop has an inefficient worst-case, where
+		 * gtk_text_iter_get_text () is called repeatedly on
+		 * a single line.
+		 */
+		GtkTextIter end;
+
+		if (limit && gtk_text_iter_compare (&search, limit) >= 0)
+			break;
+
+		if (lines_match (&search, (const gchar**)lines,
+				 visible_only, slice, &match, &end))
+		{
+			if (limit == NULL || (limit &&
+					      gtk_text_iter_compare (&end, limit) < 0))
+			{
+				retval = TRUE;
+
+				if (match_start)
+					*match_start = match;
+				if (match_end)
+					*match_end = end;
+			}
+			break;
+		}
+	} while (gtk_text_iter_forward_line (&search));
+
+	g_strfreev ((gchar**)lines);
+
+	return retval;
+}
+
+/**
+ * gtk_source_iter_backward_search:
+ * @iter: a #GtkTextIter where the search begins
+ * @str: search string
+ * @flags: bitmask of flags affecting the search
+ * @match_start: return location for start of match, or %NULL
+ * @match_end: return location for end of match, or %NULL
+ * @limit: location of last possible @match_start, or %NULL for start of buffer
+ * 
+ * Same as gtk_text_iter_backward_search(), but supports case insensitive
+ * searching.
+ * 
+ * Return value: whether a match was found
+ **/
+gboolean
+gtk_source_iter_backward_search (const GtkTextIter   *iter,
+				 const gchar         *str,
+				 GtkSourceSearchFlags flags,
+				 GtkTextIter         *match_start,
+				 GtkTextIter         *match_end,
+				 const GtkTextIter   *limit)
+{
+	gchar **lines = NULL;
+	GtkTextIter match;
+	gboolean retval = FALSE;
+	GtkTextIter search;
+	gboolean visible_only;
+	gboolean slice;
+
+	g_return_val_if_fail (iter != NULL, FALSE);
+	g_return_val_if_fail (str != NULL, FALSE);
+
+	if ((flags & GTK_SOURCE_SEARCH_CASE_INSENSITIVE) == 0)
+		return gtk_text_iter_backward_search (iter, str, flags,
+						      match_start, match_end,
+						      limit); 
+
+	if (limit && gtk_text_iter_compare (iter, limit) <= 0)
+		return FALSE;
+
+	if (*str == '\0')
+	{
+		/* If we can move one char, return the empty string there */
+		match = *iter;
+
+		if (gtk_text_iter_backward_char (&match))
+		{
+			if (limit && gtk_text_iter_equal (&match, limit))
+				return FALSE;
+
+			if (match_start)
+				*match_start = match;
+			if (match_end)
+				*match_end = match;
+			return TRUE;
+		}
+		else
+		{
+			return FALSE;
+		}
+	}
+
+	visible_only = (flags & GTK_SOURCE_SEARCH_VISIBLE_ONLY) != 0;
+	slice = (flags & GTK_SOURCE_SEARCH_TEXT_ONLY) == 0;
+
+	/* locate all lines */
+	lines = strbreakup (str, "\n", -1);
+
+	search = *iter;
+
+	while (TRUE)
+	{
+		/* This loop has an inefficient worst-case, where
+		 * gtk_text_iter_get_text () is called repeatedly on
+		 * a single line.
+		 */
+		GtkTextIter end;
+
+		if (limit && gtk_text_iter_compare (&search, limit) <= 0)
+			break;
+
+		if (backward_lines_match (&search, (const gchar**)lines,
+					  visible_only, slice, &match, &end))
+		{
+			if (limit == NULL || (limit &&
+					      gtk_text_iter_compare (&end, limit) > 0))
+			{
+				retval = TRUE;
+
+				if (match_start)
+					*match_start = match;
+				if (match_end)
+					*match_end = end;
+			}
+			break;
+		}
+
+		if (gtk_text_iter_get_line_offset (&search) == 0)
+		{
+			if (!gtk_text_iter_backward_line (&search))
+				break;
+		}
+		else
+		{
+			gtk_text_iter_set_line_offset (&search, 0);
+		}
+	}
+
+	g_strfreev ((gchar**)lines);
+
+	return retval;
+}
+
+/*
+ * gtk_source_iter_find_matching_bracket is implemented in gtksourcebuffer.c
+ */