comparison gtk/gtksourceiter.c @ 14191:009db0b357b5

This is a hand-crafted commit to migrate across subversion revisions 16854:16861, due to some vagaries of the way the original renames were done. Witness that monotone can do in one revision what svn had to spread across several.
author Ethan Blanton <elb@pidgin.im>
date Sat, 16 Dec 2006 04:59:55 +0000
parents
children
comparison
equal deleted inserted replaced
14190:366be2ce35a7 14191:009db0b357b5
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
2 * gtksourceiter.c
3 *
4 * Gaim is the legal property of its developers, whose names are too numerous
5 * to list here. Please refer to the COPYRIGHT file distributed with this
6 * source distribution.
7 *
8 * The following copyright notice applies to this file:
9 *
10 * Copyright (C) 2000 - 2005 Paolo Maggi
11 * Copyright (C) 2002, 2003 Jeroen Zwartepoorte
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU Library General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Library General Public License for more details.
22 *
23 * You should have received a copy of the GNU Library General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
26 */
27
28 /*
29 * Parts of this file are copied from the gedit and glimmer project.
30 */
31
32 #ifdef HAVE_CONFIG_H
33 #include <config.h>
34 #endif
35
36 #include <string.h>
37 #include "gtksourceiter.h"
38
39 #define GTK_TEXT_UNKNOWN_CHAR 0xFFFC
40
41 /* this function acts like g_utf8_offset_to_pointer() except that if it finds a
42 * decomposable character it consumes the decomposition length from the given
43 * offset. So it's useful when the offset was calculated for the normalized
44 * version of str, but we need a pointer to str itself. */
45 static const gchar *
46 pointer_from_offset_skipping_decomp (const gchar *str, gint offset)
47 {
48 gchar *casefold, *normal;
49 const gchar *p, *q;
50
51 p = str;
52 while (offset > 0)
53 {
54 q = g_utf8_next_char (p);
55 casefold = g_utf8_casefold (p, q - p);
56 normal = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
57 offset -= g_utf8_strlen (normal, -1);
58 g_free (casefold);
59 g_free (normal);
60 p = q;
61 }
62 return p;
63 }
64
65 static const gchar *
66 g_utf8_strcasestr (const gchar *haystack, const gchar *needle)
67 {
68 gsize needle_len;
69 gsize haystack_len;
70 const gchar *ret = NULL;
71 gchar *p;
72 gchar *casefold;
73 gchar *caseless_haystack;
74 gint i;
75
76 g_return_val_if_fail (haystack != NULL, NULL);
77 g_return_val_if_fail (needle != NULL, NULL);
78
79 casefold = g_utf8_casefold (haystack, -1);
80 caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
81 g_free (casefold);
82
83 needle_len = g_utf8_strlen (needle, -1);
84 haystack_len = g_utf8_strlen (caseless_haystack, -1);
85
86 if (needle_len == 0)
87 {
88 ret = (gchar *)haystack;
89 goto finally_1;
90 }
91
92 if (haystack_len < needle_len)
93 {
94 ret = NULL;
95 goto finally_1;
96 }
97
98 p = (gchar*)caseless_haystack;
99 needle_len = strlen (needle);
100 i = 0;
101
102 while (*p)
103 {
104 if ((strncmp (p, needle, needle_len) == 0))
105 {
106 ret = pointer_from_offset_skipping_decomp (haystack, i);
107 goto finally_1;
108 }
109
110 p = g_utf8_next_char (p);
111 i++;
112 }
113
114 finally_1:
115 g_free (caseless_haystack);
116
117 return ret;
118 }
119
120 static const gchar *
121 g_utf8_strrcasestr (const gchar *haystack, const gchar *needle)
122 {
123 gsize needle_len;
124 gsize haystack_len;
125 const gchar *ret = NULL;
126 gchar *p;
127 gchar *casefold;
128 gchar *caseless_haystack;
129 gint i;
130
131 g_return_val_if_fail (haystack != NULL, NULL);
132 g_return_val_if_fail (needle != NULL, NULL);
133
134 casefold = g_utf8_casefold (haystack, -1);
135 caseless_haystack = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
136 g_free (casefold);
137
138 needle_len = g_utf8_strlen (needle, -1);
139 haystack_len = g_utf8_strlen (caseless_haystack, -1);
140
141 if (needle_len == 0)
142 {
143 ret = (gchar *)haystack;
144 goto finally_1;
145 }
146
147 if (haystack_len < needle_len)
148 {
149 ret = NULL;
150 goto finally_1;
151 }
152
153 i = haystack_len - needle_len;
154 p = g_utf8_offset_to_pointer (caseless_haystack, i);
155 needle_len = strlen (needle);
156
157 while (p >= caseless_haystack)
158 {
159 if (strncmp (p, needle, needle_len) == 0)
160 {
161 ret = pointer_from_offset_skipping_decomp (haystack, i);
162 goto finally_1;
163 }
164
165 p = g_utf8_prev_char (p);
166 i--;
167 }
168
169 finally_1:
170 g_free (caseless_haystack);
171
172 return ret;
173 }
174
175 static gboolean
176 g_utf8_caselessnmatch (const char *s1, const char *s2,
177 gssize n1, gssize n2)
178 {
179 gchar *casefold;
180 gchar *normalized_s1;
181 gchar *normalized_s2;
182 gint len_s1;
183 gint len_s2;
184 gboolean ret = FALSE;
185
186 g_return_val_if_fail (s1 != NULL, FALSE);
187 g_return_val_if_fail (s2 != NULL, FALSE);
188 g_return_val_if_fail (n1 > 0, FALSE);
189 g_return_val_if_fail (n2 > 0, FALSE);
190
191 casefold = g_utf8_casefold (s1, n1);
192 normalized_s1 = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
193 g_free (casefold);
194
195 casefold = g_utf8_casefold (s2, n2);
196 normalized_s2 = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
197 g_free (casefold);
198
199 len_s1 = strlen (normalized_s1);
200 len_s2 = strlen (normalized_s2);
201
202 if (len_s1 < len_s2)
203 goto finally_2;
204
205 ret = (strncmp (normalized_s1, normalized_s2, len_s2) == 0);
206
207 finally_2:
208 g_free (normalized_s1);
209 g_free (normalized_s2);
210
211 return ret;
212 }
213
214 static void
215 forward_chars_with_skipping (GtkTextIter *iter,
216 gint count,
217 gboolean skip_invisible,
218 gboolean skip_nontext,
219 gboolean skip_decomp)
220 {
221 gint i;
222
223 g_return_if_fail (count >= 0);
224
225 i = count;
226
227 while (i > 0)
228 {
229 gboolean ignored = FALSE;
230
231 /* minimal workaround to avoid the infinite loop of bug #168247.
232 * It doesn't fix the problemjust the symptom...
233 */
234 if (gtk_text_iter_is_end (iter))
235 return;
236
237 if (skip_nontext && gtk_text_iter_get_char (iter) == GTK_TEXT_UNKNOWN_CHAR)
238 ignored = TRUE;
239
240 #if 0
241 if (!ignored && skip_invisible &&
242 /* _gtk_text_btree_char_is_invisible (iter)*/ FALSE)
243 ignored = TRUE;
244 #endif
245
246 if (!ignored && skip_decomp)
247 {
248 /* being UTF8 correct sucks; this accounts for extra
249 offsets coming from canonical decompositions of
250 UTF8 characters (e.g. accented characters) which
251 g_utf8_normalize() performs */
252 gchar *normal;
253 gchar buffer[6];
254 gint buffer_len;
255
256 buffer_len = g_unichar_to_utf8 (gtk_text_iter_get_char (iter), buffer);
257 normal = g_utf8_normalize (buffer, buffer_len, G_NORMALIZE_NFD);
258 i -= (g_utf8_strlen (normal, -1) - 1);
259 g_free (normal);
260 }
261
262 gtk_text_iter_forward_char (iter);
263
264 if (!ignored)
265 --i;
266 }
267 }
268
269 static gboolean
270 lines_match (const GtkTextIter *start,
271 const gchar **lines,
272 gboolean visible_only,
273 gboolean slice,
274 GtkTextIter *match_start,
275 GtkTextIter *match_end)
276 {
277 GtkTextIter next;
278 gchar *line_text;
279 const gchar *found;
280 gint offset;
281
282 if (*lines == NULL || **lines == '\0')
283 {
284 if (match_start)
285 *match_start = *start;
286 if (match_end)
287 *match_end = *start;
288 return TRUE;
289 }
290
291 next = *start;
292 gtk_text_iter_forward_line (&next);
293
294 /* No more text in buffer, but *lines is nonempty */
295 if (gtk_text_iter_equal (start, &next))
296 return FALSE;
297
298 if (slice)
299 {
300 if (visible_only)
301 line_text = gtk_text_iter_get_visible_slice (start, &next);
302 else
303 line_text = gtk_text_iter_get_slice (start, &next);
304 }
305 else
306 {
307 if (visible_only)
308 line_text = gtk_text_iter_get_visible_text (start, &next);
309 else
310 line_text = gtk_text_iter_get_text (start, &next);
311 }
312
313 if (match_start) /* if this is the first line we're matching */
314 {
315 found = g_utf8_strcasestr (line_text, *lines);
316 }
317 else
318 {
319 /* If it's not the first line, we have to match from the
320 * start of the line.
321 */
322 if (g_utf8_caselessnmatch (line_text, *lines, strlen (line_text),
323 strlen (*lines)))
324 found = line_text;
325 else
326 found = NULL;
327 }
328
329 if (found == NULL)
330 {
331 g_free (line_text);
332 return FALSE;
333 }
334
335 /* Get offset to start of search string */
336 offset = g_utf8_strlen (line_text, found - line_text);
337
338 next = *start;
339
340 /* If match start needs to be returned, set it to the
341 * start of the search string.
342 */
343 forward_chars_with_skipping (&next, offset, visible_only, !slice, FALSE);
344 if (match_start)
345 {
346 *match_start = next;
347 }
348
349 /* Go to end of search string */
350 forward_chars_with_skipping (&next, g_utf8_strlen (*lines, -1), visible_only, !slice, TRUE);
351
352 g_free (line_text);
353
354 ++lines;
355
356 if (match_end)
357 *match_end = next;
358
359 /* pass NULL for match_start, since we don't need to find the
360 * start again.
361 */
362 return lines_match (&next, lines, visible_only, slice, NULL, match_end);
363 }
364
365 static gboolean
366 backward_lines_match (const GtkTextIter *start,
367 const gchar **lines,
368 gboolean visible_only,
369 gboolean slice,
370 GtkTextIter *match_start,
371 GtkTextIter *match_end)
372 {
373 GtkTextIter line, next;
374 gchar *line_text;
375 const gchar *found;
376 gint offset;
377
378 if (*lines == NULL || **lines == '\0')
379 {
380 if (match_start)
381 *match_start = *start;
382 if (match_end)
383 *match_end = *start;
384 return TRUE;
385 }
386
387 line = next = *start;
388 if (gtk_text_iter_get_line_offset (&next) == 0)
389 {
390 if (!gtk_text_iter_backward_line (&next))
391 return FALSE;
392 }
393 else
394 gtk_text_iter_set_line_offset (&next, 0);
395
396 if (slice)
397 {
398 if (visible_only)
399 line_text = gtk_text_iter_get_visible_slice (&next, &line);
400 else
401 line_text = gtk_text_iter_get_slice (&next, &line);
402 }
403 else
404 {
405 if (visible_only)
406 line_text = gtk_text_iter_get_visible_text (&next, &line);
407 else
408 line_text = gtk_text_iter_get_text (&next, &line);
409 }
410
411 if (match_start) /* if this is the first line we're matching */
412 {
413 found = g_utf8_strrcasestr (line_text, *lines);
414 }
415 else
416 {
417 /* If it's not the first line, we have to match from the
418 * start of the line.
419 */
420 if (g_utf8_caselessnmatch (line_text, *lines, strlen (line_text),
421 strlen (*lines)))
422 found = line_text;
423 else
424 found = NULL;
425 }
426
427 if (found == NULL)
428 {
429 g_free (line_text);
430 return FALSE;
431 }
432
433 /* Get offset to start of search string */
434 offset = g_utf8_strlen (line_text, found - line_text);
435
436 forward_chars_with_skipping (&next, offset, visible_only, !slice, FALSE);
437
438 /* If match start needs to be returned, set it to the
439 * start of the search string.
440 */
441 if (match_start)
442 {
443 *match_start = next;
444 }
445
446 /* Go to end of search string */
447 forward_chars_with_skipping (&next, g_utf8_strlen (*lines, -1), visible_only, !slice, TRUE);
448
449 g_free (line_text);
450
451 ++lines;
452
453 if (match_end)
454 *match_end = next;
455
456 /* try to match the rest of the lines forward, passing NULL
457 * for match_start so lines_match will try to match the entire
458 * line */
459 return lines_match (&next, lines, visible_only,
460 slice, NULL, match_end);
461 }
462
463 /* strsplit () that retains the delimiter as part of the string. */
464 static gchar **
465 strbreakup (const char *string,
466 const char *delimiter,
467 gint max_tokens)
468 {
469 GSList *string_list = NULL, *slist;
470 gchar **str_array, *s, *casefold, *new_string;
471 guint i, n = 1;
472
473 g_return_val_if_fail (string != NULL, NULL);
474 g_return_val_if_fail (delimiter != NULL, NULL);
475
476 if (max_tokens < 1)
477 max_tokens = G_MAXINT;
478
479 s = strstr (string, delimiter);
480 if (s)
481 {
482 guint delimiter_len = strlen (delimiter);
483
484 do
485 {
486 guint len;
487
488 len = s - string + delimiter_len;
489 new_string = g_new (gchar, len + 1);
490 strncpy (new_string, string, len);
491 new_string[len] = 0;
492 casefold = g_utf8_casefold (new_string, -1);
493 g_free (new_string);
494 new_string = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
495 g_free (casefold);
496 string_list = g_slist_prepend (string_list, new_string);
497 n++;
498 string = s + delimiter_len;
499 s = strstr (string, delimiter);
500 } while (--max_tokens && s);
501 }
502
503 if (*string)
504 {
505 n++;
506 casefold = g_utf8_casefold (string, -1);
507 new_string = g_utf8_normalize (casefold, -1, G_NORMALIZE_NFD);
508 g_free (casefold);
509 string_list = g_slist_prepend (string_list, new_string);
510 }
511
512 str_array = g_new (gchar*, n);
513
514 i = n - 1;
515
516 str_array[i--] = NULL;
517 for (slist = string_list; slist; slist = slist->next)
518 str_array[i--] = slist->data;
519
520 g_slist_free (string_list);
521
522 return str_array;
523 }
524
525 /**
526 * gtk_source_iter_forward_search:
527 * @iter: start of search.
528 * @str: a search string.
529 * @flags: flags affecting how the search is done.
530 * @match_start: return location for start of match, or %%NULL.
531 * @match_end: return location for end of match, or %%NULL.
532 * @limit: bound for the search, or %%NULL for the end of the buffer.
533 *
534 * Searches forward for @str. Any match is returned by setting
535 * @match_start to the first character of the match and @match_end to the
536 * first character after the match. The search will not continue past
537 * @limit. Note that a search is a linear or O(n) operation, so you
538 * may wish to use @limit to avoid locking up your UI on large
539 * buffers.
540 *
541 * If the #GTK_SOURCE_SEARCH_VISIBLE_ONLY flag is present, the match may
542 * have invisible text interspersed in @str. i.e. @str will be a
543 * possibly-noncontiguous subsequence of the matched range. similarly,
544 * if you specify #GTK_SOURCE_SEARCH_TEXT_ONLY, the match may have
545 * pixbufs or child widgets mixed inside the matched range. If these
546 * flags are not given, the match must be exact; the special 0xFFFC
547 * character in @str will match embedded pixbufs or child widgets.
548 * If you specify the #GTK_SOURCE_SEARCH_CASE_INSENSITIVE flag, the text will
549 * be matched regardless of what case it is in.
550 *
551 * Same as gtk_text_iter_forward_search(), but supports case insensitive
552 * searching.
553 *
554 * Return value: whether a match was found.
555 **/
556 gboolean
557 gtk_source_iter_forward_search (const GtkTextIter *iter,
558 const gchar *str,
559 GtkSourceSearchFlags flags,
560 GtkTextIter *match_start,
561 GtkTextIter *match_end,
562 const GtkTextIter *limit)
563 {
564 gchar **lines = NULL;
565 GtkTextIter match;
566 gboolean retval = FALSE;
567 GtkTextIter search;
568 gboolean visible_only;
569 gboolean slice;
570
571 g_return_val_if_fail (iter != NULL, FALSE);
572 g_return_val_if_fail (str != NULL, FALSE);
573
574 if ((flags & GTK_SOURCE_SEARCH_CASE_INSENSITIVE) == 0)
575 return gtk_text_iter_forward_search (iter, str, flags,
576 match_start, match_end,
577 limit);
578
579 if (limit && gtk_text_iter_compare (iter, limit) >= 0)
580 return FALSE;
581
582 if (*str == '\0')
583 {
584 /* If we can move one char, return the empty string there */
585 match = *iter;
586
587 if (gtk_text_iter_forward_char (&match))
588 {
589 if (limit && gtk_text_iter_equal (&match, limit))
590 return FALSE;
591
592 if (match_start)
593 *match_start = match;
594 if (match_end)
595 *match_end = match;
596 return TRUE;
597 }
598 else
599 {
600 return FALSE;
601 }
602 }
603
604 visible_only = (flags & GTK_SOURCE_SEARCH_VISIBLE_ONLY) != 0;
605 slice = (flags & GTK_SOURCE_SEARCH_TEXT_ONLY) == 0;
606
607 /* locate all lines */
608 lines = strbreakup (str, "\n", -1);
609
610 search = *iter;
611
612 do
613 {
614 /* This loop has an inefficient worst-case, where
615 * gtk_text_iter_get_text () is called repeatedly on
616 * a single line.
617 */
618 GtkTextIter end;
619
620 if (limit && gtk_text_iter_compare (&search, limit) >= 0)
621 break;
622
623 if (lines_match (&search, (const gchar**)lines,
624 visible_only, slice, &match, &end))
625 {
626 if (limit == NULL ||
627 (limit && gtk_text_iter_compare (&end, limit) <= 0))
628 {
629 retval = TRUE;
630
631 if (match_start)
632 *match_start = match;
633 if (match_end)
634 *match_end = end;
635 }
636 break;
637 }
638 } while (gtk_text_iter_forward_line (&search));
639
640 g_strfreev ((gchar**)lines);
641
642 return retval;
643 }
644
645 /**
646 * gtk_source_iter_backward_search:
647 * @iter: a #GtkTextIter where the search begins.
648 * @str: search string.
649 * @flags: bitmask of flags affecting the search.
650 * @match_start: return location for start of match, or %%NULL.
651 * @match_end: return location for end of match, or %%NULL.
652 * @limit: location of last possible @match_start, or %%NULL for start of buffer.
653 *
654 * Same as gtk_text_iter_backward_search(), but supports case insensitive
655 * searching.
656 *
657 * Return value: whether a match was found.
658 **/
659 gboolean
660 gtk_source_iter_backward_search (const GtkTextIter *iter,
661 const gchar *str,
662 GtkSourceSearchFlags flags,
663 GtkTextIter *match_start,
664 GtkTextIter *match_end,
665 const GtkTextIter *limit)
666 {
667 gchar **lines = NULL;
668 GtkTextIter match;
669 gboolean retval = FALSE;
670 GtkTextIter search;
671 gboolean visible_only;
672 gboolean slice;
673
674 g_return_val_if_fail (iter != NULL, FALSE);
675 g_return_val_if_fail (str != NULL, FALSE);
676
677 if ((flags & GTK_SOURCE_SEARCH_CASE_INSENSITIVE) == 0)
678 return gtk_text_iter_backward_search (iter, str, flags,
679 match_start, match_end,
680 limit);
681
682 if (limit && gtk_text_iter_compare (iter, limit) <= 0)
683 return FALSE;
684
685 if (*str == '\0')
686 {
687 /* If we can move one char, return the empty string there */
688 match = *iter;
689
690 if (gtk_text_iter_backward_char (&match))
691 {
692 if (limit && gtk_text_iter_equal (&match, limit))
693 return FALSE;
694
695 if (match_start)
696 *match_start = match;
697 if (match_end)
698 *match_end = match;
699 return TRUE;
700 }
701 else
702 {
703 return FALSE;
704 }
705 }
706
707 visible_only = (flags & GTK_SOURCE_SEARCH_VISIBLE_ONLY) != 0;
708 slice = (flags & GTK_SOURCE_SEARCH_TEXT_ONLY) == 0;
709
710 /* locate all lines */
711 lines = strbreakup (str, "\n", -1);
712
713 search = *iter;
714
715 while (TRUE)
716 {
717 /* This loop has an inefficient worst-case, where
718 * gtk_text_iter_get_text () is called repeatedly on
719 * a single line.
720 */
721 GtkTextIter end;
722
723 if (limit && gtk_text_iter_compare (&search, limit) <= 0)
724 break;
725
726 if (backward_lines_match (&search, (const gchar**)lines,
727 visible_only, slice, &match, &end))
728 {
729 if (limit == NULL || (limit &&
730 gtk_text_iter_compare (&end, limit) > 0))
731 {
732 retval = TRUE;
733
734 if (match_start)
735 *match_start = match;
736 if (match_end)
737 *match_end = end;
738 }
739 break;
740 }
741
742 if (gtk_text_iter_get_line_offset (&search) == 0)
743 {
744 if (!gtk_text_iter_backward_line (&search))
745 break;
746 }
747 else
748 {
749 gtk_text_iter_set_line_offset (&search, 0);
750 }
751 }
752
753 g_strfreev ((gchar**)lines);
754
755 return retval;
756 }
757
758 /*
759 * gtk_source_iter_find_matching_bracket is implemented in gtksourcebuffer.c
760 */