comparison src/gtksourceiter.c @ 7358:78c1fc730dc4

[gaim-migrate @ 7951] Case-insensitive searching in gtkimhtml. committer: Tailor Script <tailor@pidgin.im>
author Sean Egan <seanegan@gmail.com>
date Tue, 28 Oct 2003 00:29:32 +0000
parents
children fa6395637e2c
comparison
equal deleted inserted replaced
7357:eb0acf8c8f1e 7358:78c1fc730dc4
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
2 * gtksourceiter.h
3 *
4 * Copyright (C) 2000, 2002 Paolo Maggi
5 * Copyright (C) 2002, 2003 Jeroen Zwartepoorte
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Library General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 */
21
22 /*
23 * Parts of this file are copied from the gedit and glimmer project.
24 */
25
26 #ifdef HAVE_CONFIG_H
27 #include <config.h>
28 #endif
29
30 #include <string.h>
31 #include "gtksourceiter.h"
32
33 #define GTK_TEXT_UNKNOWN_CHAR 0xFFFC
34
35 static gchar *
36 g_utf8_strcasestr (const gchar *haystack, const gchar *needle)
37 {
38 gsize needle_len;
39 gsize haystack_len;
40 gchar *ret = NULL;
41 gchar *p;
42 gchar *casefold;
43 gchar *caseless_haystack;
44 gint i;
45
46 g_return_val_if_fail (haystack != NULL, NULL);
47 g_return_val_if_fail (needle != NULL, NULL);
48
49 casefold = g_utf8_casefold (haystack, -1);
50 caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
51 g_free (casefold);
52
53 needle_len = g_utf8_strlen (needle, -1);
54 haystack_len = g_utf8_strlen (caseless_haystack, -1);
55
56 if (needle_len == 0)
57 {
58 ret = (gchar *)haystack;
59 goto finally_1;
60 }
61
62 if (haystack_len < needle_len)
63 {
64 ret = NULL;
65 goto finally_1;
66 }
67
68 p = (gchar*)caseless_haystack;
69 needle_len = strlen (needle);
70 i = 0;
71
72 while (*p)
73 {
74 if ((strncmp (p, needle, needle_len) == 0))
75 {
76 ret = g_utf8_offset_to_pointer (haystack, i);
77 goto finally_1;
78 }
79
80 p = g_utf8_next_char (p);
81 i++;
82 }
83
84 finally_1:
85 g_free (caseless_haystack);
86
87 return ret;
88 }
89
90 static gchar *
91 g_utf8_strrcasestr (const gchar *haystack, const gchar *needle)
92 {
93 gsize needle_len;
94 gsize haystack_len;
95 gchar *ret = NULL;
96 gchar *p;
97 gchar *casefold;
98 gchar *caseless_haystack;
99 gint i;
100
101 g_return_val_if_fail (haystack != NULL, NULL);
102 g_return_val_if_fail (needle != NULL, NULL);
103
104 casefold = g_utf8_casefold (haystack, -1);
105 caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
106 g_free (casefold);
107
108 needle_len = g_utf8_strlen (needle, -1);
109 haystack_len = g_utf8_strlen (caseless_haystack, -1);
110
111 if (needle_len == 0)
112 {
113 ret = (gchar *)haystack;
114 goto finally_1;
115 }
116
117 if (haystack_len < needle_len)
118 {
119 ret = NULL;
120 goto finally_1;
121 }
122
123 haystack_len = strlen (caseless_haystack);
124 needle_len = strlen (needle);
125 p = (gchar *)caseless_haystack + haystack_len - needle_len;
126 i = haystack_len - needle_len;
127
128 while (p >= caseless_haystack)
129 {
130 if (strncasecmp (p, needle, needle_len) == 0)
131 {
132 ret = g_utf8_offset_to_pointer (haystack, i);
133 goto finally_1;
134 }
135
136 p = g_utf8_prev_char (p);
137 i--;
138 }
139
140 finally_1:
141 g_free (caseless_haystack);
142
143 return ret;
144 }
145
146 static gboolean
147 g_utf8_caselessnmatch (const char *s1, const char *s2,
148 gssize n1, gssize n2)
149 {
150 gchar *casefold;
151 gchar *normalized_s1;
152 gchar *normalized_s2;
153 gint len_s1;
154 gint len_s2;
155 gboolean ret = FALSE;
156
157 g_return_val_if_fail (s1 != NULL, FALSE);
158 g_return_val_if_fail (s2 != NULL, FALSE);
159 g_return_val_if_fail (n1 > 0, FALSE);
160 g_return_val_if_fail (n2 > 0, FALSE);
161
162 casefold = g_utf8_casefold (s1, n1);
163 normalized_s1 = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
164 g_free (casefold);
165
166 casefold = g_utf8_casefold (s2, n2);
167 normalized_s2 = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
168 g_free (casefold);
169
170 len_s1 = strlen (normalized_s1);
171 len_s2 = strlen (normalized_s2);
172
173 if (len_s1 < len_s2)
174 goto finally_2;
175
176 ret = (strncmp (normalized_s1, normalized_s2, len_s2) == 0);
177
178 finally_2:
179 g_free (normalized_s1);
180 g_free (normalized_s2);
181
182 return ret;
183 }
184
185 static void
186 forward_chars_with_skipping (GtkTextIter *iter,
187 gint count,
188 gboolean skip_invisible,
189 gboolean skip_nontext)
190 {
191 gint i;
192
193 g_return_if_fail (count >= 0);
194
195 i = count;
196
197 while (i > 0)
198 {
199 gboolean ignored = FALSE;
200
201 if (skip_nontext && gtk_text_iter_get_char (iter) == GTK_TEXT_UNKNOWN_CHAR)
202 ignored = TRUE;
203
204 if (!ignored && skip_invisible &&
205 /* _gtk_text_btree_char_is_invisible (iter)*/ FALSE)
206 ignored = TRUE;
207
208 gtk_text_iter_forward_char (iter);
209
210 if (!ignored)
211 --i;
212 }
213 }
214
215 static gboolean
216 lines_match (const GtkTextIter *start,
217 const gchar **lines,
218 gboolean visible_only,
219 gboolean slice,
220 GtkTextIter *match_start,
221 GtkTextIter *match_end)
222 {
223 GtkTextIter next;
224 gchar *line_text;
225 const gchar *found;
226 gint offset;
227
228 if (*lines == NULL || **lines == '\0')
229 {
230 if (match_start)
231 *match_start = *start;
232 if (match_end)
233 *match_end = *start;
234 return TRUE;
235 }
236
237 next = *start;
238 gtk_text_iter_forward_line (&next);
239
240 /* No more text in buffer, but *lines is nonempty */
241 if (gtk_text_iter_equal (start, &next))
242 return FALSE;
243
244 if (slice)
245 {
246 if (visible_only)
247 line_text = gtk_text_iter_get_visible_slice (start, &next);
248 else
249 line_text = gtk_text_iter_get_slice (start, &next);
250 }
251 else
252 {
253 if (visible_only)
254 line_text = gtk_text_iter_get_visible_text (start, &next);
255 else
256 line_text = gtk_text_iter_get_text (start, &next);
257 }
258
259 if (match_start) /* if this is the first line we're matching */
260 {
261 found = g_utf8_strcasestr (line_text, *lines);
262 }
263 else
264 {
265 /* If it's not the first line, we have to match from the
266 * start of the line.
267 */
268 if (g_utf8_caselessnmatch (line_text, *lines, strlen (line_text),
269 strlen (*lines)))
270 found = line_text;
271 else
272 found = NULL;
273 }
274
275 if (found == NULL)
276 {
277 g_free (line_text);
278 return FALSE;
279 }
280
281 /* Get offset to start of search string */
282 offset = g_utf8_strlen (line_text, found - line_text);
283
284 next = *start;
285
286 /* If match start needs to be returned, set it to the
287 * start of the search string.
288 */
289 if (match_start)
290 {
291 *match_start = next;
292
293 forward_chars_with_skipping (match_start, offset,
294 visible_only, !slice);
295 }
296
297 /* Go to end of search string */
298 offset += g_utf8_strlen (*lines, -1);
299
300 forward_chars_with_skipping (&next, offset, visible_only, !slice);
301
302 g_free (line_text);
303
304 ++lines;
305
306 if (match_end)
307 *match_end = next;
308
309 /* pass NULL for match_start, since we don't need to find the
310 * start again.
311 */
312 return lines_match (&next, lines, visible_only, slice, NULL, match_end);
313 }
314
315 static gboolean
316 backward_lines_match (const GtkTextIter *start,
317 const gchar **lines,
318 gboolean visible_only,
319 gboolean slice,
320 GtkTextIter *match_start,
321 GtkTextIter *match_end)
322 {
323 GtkTextIter line, next;
324 gchar *line_text;
325 const gchar *found;
326 gint offset;
327
328 if (*lines == NULL || **lines == '\0')
329 {
330 if (match_start)
331 *match_start = *start;
332 if (match_end)
333 *match_end = *start;
334 return TRUE;
335 }
336
337 line = next = *start;
338 if (gtk_text_iter_get_line_offset (&next) == 0)
339 {
340 if (!gtk_text_iter_backward_line (&next))
341 return FALSE;
342 }
343 else
344 gtk_text_iter_set_line_offset (&next, 0);
345
346 if (slice)
347 {
348 if (visible_only)
349 line_text = gtk_text_iter_get_visible_slice (&next, &line);
350 else
351 line_text = gtk_text_iter_get_slice (&next, &line);
352 }
353 else
354 {
355 if (visible_only)
356 line_text = gtk_text_iter_get_visible_text (&next, &line);
357 else
358 line_text = gtk_text_iter_get_text (&next, &line);
359 }
360
361 if (match_start) /* if this is the first line we're matching */
362 {
363 found = g_utf8_strrcasestr (line_text, *lines);
364 }
365 else
366 {
367 /* If it's not the first line, we have to match from the
368 * start of the line.
369 */
370 if (g_utf8_caselessnmatch (line_text, *lines, strlen (line_text),
371 strlen (*lines)))
372 found = line_text;
373 else
374 found = NULL;
375 }
376
377 if (found == NULL)
378 {
379 g_free (line_text);
380 return FALSE;
381 }
382
383 /* Get offset to start of search string */
384 offset = g_utf8_strlen (line_text, found - line_text);
385
386 /* If match start needs to be returned, set it to the
387 * start of the search string.
388 */
389 if (match_start)
390 {
391 *match_start = next;
392 gtk_text_iter_set_visible_line_offset (match_start, offset);
393 }
394
395 /* Go to end of search string */
396 offset += g_utf8_strlen (*lines, -1);
397
398 forward_chars_with_skipping (&next, offset, visible_only, !slice);
399
400 g_free (line_text);
401
402 ++lines;
403
404 if (match_end)
405 *match_end = next;
406
407 /* try to match the rest of the lines forward, passing NULL
408 * for match_start so lines_match will try to match the entire
409 * line */
410 return lines_match (&next, lines, visible_only,
411 slice, NULL, match_end);
412 }
413
414 /* strsplit () that retains the delimiter as part of the string. */
415 static gchar **
416 strbreakup (const char *string,
417 const char *delimiter,
418 gint max_tokens)
419 {
420 GSList *string_list = NULL, *slist;
421 gchar **str_array, *s, *casefold, *new_string;
422 guint i, n = 1;
423
424 g_return_val_if_fail (string != NULL, NULL);
425 g_return_val_if_fail (delimiter != NULL, NULL);
426
427 if (max_tokens < 1)
428 max_tokens = G_MAXINT;
429
430 s = strstr (string, delimiter);
431 if (s)
432 {
433 guint delimiter_len = strlen (delimiter);
434
435 do
436 {
437 guint len;
438
439 len = s - string + delimiter_len;
440 new_string = g_new (gchar, len + 1);
441 strncpy (new_string, string, len);
442 new_string[len] = 0;
443 casefold = g_utf8_casefold (new_string, -1);
444 g_free (new_string);
445 new_string = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
446 g_free (casefold);
447 string_list = g_slist_prepend (string_list, new_string);
448 n++;
449 string = s + delimiter_len;
450 s = strstr (string, delimiter);
451 } while (--max_tokens && s);
452 }
453
454 if (*string)
455 {
456 n++;
457 casefold = g_utf8_casefold (string, -1);
458 new_string = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
459 g_free (casefold);
460 string_list = g_slist_prepend (string_list, new_string);
461 }
462
463 str_array = g_new (gchar*, n);
464
465 i = n - 1;
466
467 str_array[i--] = NULL;
468 for (slist = string_list; slist; slist = slist->next)
469 str_array[i--] = slist->data;
470
471 g_slist_free (string_list);
472
473 return str_array;
474 }
475
476 /**
477 * gtk_source_iter_forward_search:
478 * @iter: start of search
479 * @str: a search string
480 * @flags: flags affecting how the search is done
481 * @match_start: return location for start of match, or %NULL
482 * @match_end: return location for end of match, or %NULL
483 * @limit: bound for the search, or %NULL for the end of the buffer
484 *
485 * Searches forward for @str. Any match is returned by setting
486 * @match_start to the first character of the match and @match_end to the
487 * first character after the match. The search will not continue past
488 * @limit. Note that a search is a linear or O(n) operation, so you
489 * may wish to use @limit to avoid locking up your UI on large
490 * buffers.
491 *
492 * If the #GTK_SOURCE_SEARCH_VISIBLE_ONLY flag is present, the match may
493 * have invisible text interspersed in @str. i.e. @str will be a
494 * possibly-noncontiguous subsequence of the matched range. similarly,
495 * if you specify #GTK_SOURCE_SEARCH_TEXT_ONLY, the match may have
496 * pixbufs or child widgets mixed inside the matched range. If these
497 * flags are not given, the match must be exact; the special 0xFFFC
498 * character in @str will match embedded pixbufs or child widgets.
499 * If you specify the #GTK_SOURCE_SEARCH_CASE_INSENSITIVE flag, the text will
500 * be matched regardless of what case it is in.
501 *
502 * Same as gtk_text_iter_forward_search(), but supports case insensitive
503 * searching.
504 *
505 * Return value: whether a match was found
506 **/
507 gboolean
508 gtk_source_iter_forward_search (const GtkTextIter *iter,
509 const gchar *str,
510 GtkSourceSearchFlags flags,
511 GtkTextIter *match_start,
512 GtkTextIter *match_end,
513 const GtkTextIter *limit)
514 {
515 gchar **lines = NULL;
516 GtkTextIter match;
517 gboolean retval = FALSE;
518 GtkTextIter search;
519 gboolean visible_only;
520 gboolean slice;
521
522 g_return_val_if_fail (iter != NULL, FALSE);
523 g_return_val_if_fail (str != NULL, FALSE);
524
525 if ((flags & GTK_SOURCE_SEARCH_CASE_INSENSITIVE) == 0)
526 return gtk_text_iter_forward_search (iter, str, flags,
527 match_start, match_end,
528 limit);
529
530 if (limit && gtk_text_iter_compare (iter, limit) >= 0)
531 return FALSE;
532
533 if (*str == '\0')
534 {
535 /* If we can move one char, return the empty string there */
536 match = *iter;
537
538 if (gtk_text_iter_forward_char (&match))
539 {
540 if (limit && gtk_text_iter_equal (&match, limit))
541 return FALSE;
542
543 if (match_start)
544 *match_start = match;
545 if (match_end)
546 *match_end = match;
547 return TRUE;
548 }
549 else
550 {
551 return FALSE;
552 }
553 }
554
555 visible_only = (flags & GTK_SOURCE_SEARCH_VISIBLE_ONLY) != 0;
556 slice = (flags & GTK_SOURCE_SEARCH_TEXT_ONLY) == 0;
557
558 /* locate all lines */
559 lines = strbreakup (str, "\n", -1);
560
561 search = *iter;
562
563 do
564 {
565 /* This loop has an inefficient worst-case, where
566 * gtk_text_iter_get_text () is called repeatedly on
567 * a single line.
568 */
569 GtkTextIter end;
570
571 if (limit && gtk_text_iter_compare (&search, limit) >= 0)
572 break;
573
574 if (lines_match (&search, (const gchar**)lines,
575 visible_only, slice, &match, &end))
576 {
577 if (limit == NULL || (limit &&
578 gtk_text_iter_compare (&end, limit) < 0))
579 {
580 retval = TRUE;
581
582 if (match_start)
583 *match_start = match;
584 if (match_end)
585 *match_end = end;
586 }
587 break;
588 }
589 } while (gtk_text_iter_forward_line (&search));
590
591 g_strfreev ((gchar**)lines);
592
593 return retval;
594 }
595
596 /**
597 * gtk_source_iter_backward_search:
598 * @iter: a #GtkTextIter where the search begins
599 * @str: search string
600 * @flags: bitmask of flags affecting the search
601 * @match_start: return location for start of match, or %NULL
602 * @match_end: return location for end of match, or %NULL
603 * @limit: location of last possible @match_start, or %NULL for start of buffer
604 *
605 * Same as gtk_text_iter_backward_search(), but supports case insensitive
606 * searching.
607 *
608 * Return value: whether a match was found
609 **/
610 gboolean
611 gtk_source_iter_backward_search (const GtkTextIter *iter,
612 const gchar *str,
613 GtkSourceSearchFlags flags,
614 GtkTextIter *match_start,
615 GtkTextIter *match_end,
616 const GtkTextIter *limit)
617 {
618 gchar **lines = NULL;
619 GtkTextIter match;
620 gboolean retval = FALSE;
621 GtkTextIter search;
622 gboolean visible_only;
623 gboolean slice;
624
625 g_return_val_if_fail (iter != NULL, FALSE);
626 g_return_val_if_fail (str != NULL, FALSE);
627
628 if ((flags & GTK_SOURCE_SEARCH_CASE_INSENSITIVE) == 0)
629 return gtk_text_iter_backward_search (iter, str, flags,
630 match_start, match_end,
631 limit);
632
633 if (limit && gtk_text_iter_compare (iter, limit) <= 0)
634 return FALSE;
635
636 if (*str == '\0')
637 {
638 /* If we can move one char, return the empty string there */
639 match = *iter;
640
641 if (gtk_text_iter_backward_char (&match))
642 {
643 if (limit && gtk_text_iter_equal (&match, limit))
644 return FALSE;
645
646 if (match_start)
647 *match_start = match;
648 if (match_end)
649 *match_end = match;
650 return TRUE;
651 }
652 else
653 {
654 return FALSE;
655 }
656 }
657
658 visible_only = (flags & GTK_SOURCE_SEARCH_VISIBLE_ONLY) != 0;
659 slice = (flags & GTK_SOURCE_SEARCH_TEXT_ONLY) == 0;
660
661 /* locate all lines */
662 lines = strbreakup (str, "\n", -1);
663
664 search = *iter;
665
666 while (TRUE)
667 {
668 /* This loop has an inefficient worst-case, where
669 * gtk_text_iter_get_text () is called repeatedly on
670 * a single line.
671 */
672 GtkTextIter end;
673
674 if (limit && gtk_text_iter_compare (&search, limit) <= 0)
675 break;
676
677 if (backward_lines_match (&search, (const gchar**)lines,
678 visible_only, slice, &match, &end))
679 {
680 if (limit == NULL || (limit &&
681 gtk_text_iter_compare (&end, limit) > 0))
682 {
683 retval = TRUE;
684
685 if (match_start)
686 *match_start = match;
687 if (match_end)
688 *match_end = end;
689 }
690 break;
691 }
692
693 if (gtk_text_iter_get_line_offset (&search) == 0)
694 {
695 if (!gtk_text_iter_backward_line (&search))
696 break;
697 }
698 else
699 {
700 gtk_text_iter_set_line_offset (&search, 0);
701 }
702 }
703
704 g_strfreev ((gchar**)lines);
705
706 return retval;
707 }
708
709 /*
710 * gtk_source_iter_find_matching_bracket is implemented in gtksourcebuffer.c
711 */