Mercurial > geeqie.yaz
annotate src/similar.c @ 646:91dfc98f3ca1
Reading of Iptc.Application2.Keywords should now be fixed.
Use uft8_validate_or_convert() on exif's text data to be safe.
author | zas_ |
---|---|
date | Mon, 12 May 2008 23:26:00 +0000 |
parents | f9bf33be53ff |
children | d27b4184ceb8 |
rev | line source |
---|---|
9 | 1 /* |
196 | 2 * Geeqie |
9 | 3 * (C) 2004 John Ellis |
475 | 4 * Copyright (C) 2008 The Geeqie Team |
9 | 5 * |
6 * Author: John Ellis | |
7 * | |
8 * This software is released under the GNU General Public License (GNU GPL). | |
9 * Please read the included file COPYING for more information. | |
10 * This software comes with no warranty of any kind, use at your own risk! | |
11 */ | |
12 | |
13 | |
281 | 14 #include "main.h" |
9 | 15 #include "similar.h" |
16 | |
17 /* | |
18 * These functions are intended to find images with similar color content. For | |
19 * example when an image was saved at different compression levels or dimensions | |
20 * (scaled down/up) the contents are similar, but these files do not match by file | |
21 * size, dimensions, or checksum. | |
22 * | |
23 * These functions create a 32 x 32 array for each color channel (red, green, blue). | |
24 * The array represents the average color of each corresponding part of the | |
25 * image. (imagine the image cut into 1024 rectangles, or a 32 x 32 grid. | |
26 * Each grid is then processed for the average color value, this is what | |
27 * is stored in the array) | |
28 * | |
29 * To compare two images, generate a ImageSimilarityData for each image, then | |
30 * pass them to the compare function. The return value is the percent match | |
31 * of the two images. (for this, simple comparisons are used, basically the return | |
32 * is an average of the corresponding array differences) | |
33 * | |
34 * for image_sim_compare(), the return is 0.0 to 1.0: | |
35 * 1.0 for exact matches (an image is compared to itself) | |
36 * 0.0 for exact opposite images (compare an all black to an all white image) | |
37 * generally only a match of > 0.85 are significant at all, and >.95 is useful to | |
38 * find images that have been re-saved to other formats, dimensions, or compression. | |
39 */ | |
40 | |
41 | |
42 /* | |
43 * The experimental (alternate) algorithm is only for testing of new techniques to | |
44 * improve the result, and hopes to reduce false positives. | |
45 */ | |
46 | |
47 static gint alternate_enabled = FALSE; | |
48 | |
49 void image_sim_alternate_set(gint enable) | |
50 { | |
51 alternate_enabled = enable; | |
52 } | |
53 | |
54 gint image_sim_alternate_enabled(void) | |
55 { | |
56 return alternate_enabled; | |
57 } | |
58 | |
59 ImageSimilarityData *image_sim_new(void) | |
60 { | |
61 ImageSimilarityData *sd = g_new0(ImageSimilarityData, 1); | |
62 | |
63 return sd; | |
64 } | |
65 | |
66 void image_sim_free(ImageSimilarityData *sd) | |
67 { | |
68 g_free(sd); | |
69 } | |
70 | |
71 #if 0 | |
72 static void image_sim_channel_expand(guint8 *pix, gint len) | |
73 { | |
74 guint8 l, h; | |
75 gint i; | |
76 gdouble scale; | |
77 | |
78 /* set the start values */ | |
79 l = h = pix[0]; | |
80 | |
81 /* find min/max */ | |
82 for (i = 0; i < len; i++) | |
83 { | |
84 if (pix[i] < l) l = pix[i]; | |
85 if (pix[i] > h) h = pix[i]; | |
86 } | |
87 | |
88 /* calc scale from range */ | |
89 if (l != h) | |
90 { | |
91 scale = 255.0 / (gdouble)(h - l); | |
92 } | |
93 else | |
94 { | |
95 scale = 1.0; | |
96 } | |
97 | |
98 for (i = 0; i < len; i++) | |
99 { | |
100 pix[i] = (guint8)((gdouble)pix[i] - l * scale); | |
101 } | |
102 } | |
103 #endif | |
104 | |
105 static int image_sim_channel_eq_sort_cb(const void *a, const void *b) | |
106 { | |
107 gint *pa = (void *)a; | |
108 gint *pb = (void *)b; | |
109 if (pa[1] < pb[1]) return -1; | |
110 if (pa[1] > pb[1]) return 1; | |
111 return 0; | |
112 } | |
113 | |
114 static void image_sim_channel_equal(guint8 *pix, gint len) | |
115 { | |
116 gint *buf; | |
117 gint i; | |
118 gint p; | |
119 | |
120 buf = g_new0(gint, len * 2); | |
121 | |
122 p = 0; | |
123 for (i = 0; i < len; i++) | |
124 { | |
125 buf[p] = i; | |
126 p++; | |
127 buf[p] = (gint)pix[i]; | |
128 p++; | |
129 } | |
130 | |
512
f9bf33be53ff
Remove whitespace between function name and first parenthesis for the sake of consistency.
zas_
parents:
475
diff
changeset
|
131 qsort(buf, len, sizeof(gint) * 2, image_sim_channel_eq_sort_cb); |
9 | 132 |
133 p = 0; | |
134 for (i = 0; i < len; i++) | |
135 { | |
136 gint n; | |
137 | |
138 n = buf[p]; | |
139 p+= 2; | |
140 pix[n] = (guint8)(255 * i / len); | |
141 } | |
142 | |
143 g_free(buf); | |
144 } | |
145 | |
146 static void image_sim_channel_norm(guint8 *pix, gint len) | |
147 { | |
148 guint8 l, h; | |
149 gint i; | |
150 gdouble scale; | |
151 | |
152 l = h = pix[0]; | |
153 | |
154 for (i = 0; i < len; i++) | |
155 { | |
156 if (pix[i] < l) l = pix[i]; | |
157 if (pix[i] > h) h = pix[i]; | |
158 } | |
159 | |
160 scale = (h-l !=0) ? 255.0 / (gdouble)(h - l) : 1.0; | |
161 | |
162 for (i = 0; i < len; i++) | |
163 { | |
164 pix[i] = (guint8)((gdouble)(pix[i] - l) * scale); | |
165 } | |
166 } | |
167 | |
168 /* | |
169 * define these to enable various components of the experimental compare functions | |
170 * | |
171 * Convert the thumbprint to greyscale (ignore all color information when comparing) | |
172 * #define ALTERNATE_USES_GREYSCALE 1 | |
173 * | |
174 * Take into account the difference in change from one pixel to the next | |
175 * #define ALTERNATE_INCLUDE_COMPARE_CHANGE 1 | |
176 */ | |
177 | |
178 void image_sim_alternate_processing(ImageSimilarityData *sd) | |
179 { | |
180 #ifdef ALTERNATE_USES_GREYSCALE | |
181 gint i; | |
182 #endif | |
183 | |
184 if (!alternate_enabled) return; | |
442 | 185 |
9 | 186 image_sim_channel_norm(sd->avg_r, sizeof(sd->avg_r)); |
187 image_sim_channel_norm(sd->avg_g, sizeof(sd->avg_g)); | |
188 image_sim_channel_norm(sd->avg_b, sizeof(sd->avg_b)); | |
189 | |
190 image_sim_channel_equal(sd->avg_r, sizeof(sd->avg_r)); | |
191 image_sim_channel_equal(sd->avg_g, sizeof(sd->avg_g)); | |
192 image_sim_channel_equal(sd->avg_b, sizeof(sd->avg_b)); | |
193 | |
194 #ifdef ALTERNATE_USES_GREYSCALE | |
195 for (i = 0; i < sizeof(sd->avg_r); i++) | |
196 { | |
197 guint8 n; | |
442 | 198 |
9 | 199 n = (guint8)((gint)(sd->avg_r[i] + sd->avg_g[i] + sd->avg_b[i]) / 3); |
200 sd->avg_r[i] = sd->avg_g[i] = sd->avg_b[i] = n; | |
201 } | |
202 #endif | |
203 } | |
204 | |
205 void image_sim_fill_data(ImageSimilarityData *sd, GdkPixbuf *pixbuf) | |
206 { | |
207 gint w, h; | |
208 gint rs; | |
209 guchar *pix; | |
210 gint has_alpha; | |
211 gint p_step; | |
212 | |
213 guchar *p; | |
214 gint i; | |
215 gint j; | |
216 gint x_inc, y_inc; | |
217 gint xs, ys; | |
218 | |
219 gint x_small = FALSE; /* if less than 32 w or h, set TRUE */ | |
220 gint y_small = FALSE; | |
221 | |
222 if (!sd || !pixbuf) return; | |
223 | |
224 w = gdk_pixbuf_get_width(pixbuf); | |
225 h = gdk_pixbuf_get_height(pixbuf); | |
226 rs = gdk_pixbuf_get_rowstride(pixbuf); | |
227 pix = gdk_pixbuf_get_pixels(pixbuf); | |
228 has_alpha = gdk_pixbuf_get_has_alpha(pixbuf); | |
229 | |
230 p_step = has_alpha ? 4 : 3; | |
231 x_inc = w / 32; | |
232 y_inc = h / 32; | |
233 | |
234 if (x_inc < 1) | |
235 { | |
236 x_inc = 1; | |
237 x_small = TRUE; | |
238 } | |
239 if (y_inc < 1) | |
240 { | |
241 y_inc = 1; | |
242 y_small = TRUE; | |
243 } | |
244 | |
245 j = 0; | |
246 | |
247 for (ys = 0; ys < 32; ys++) | |
248 { | |
249 if (y_small) j = (gdouble)h / 32 * ys; | |
250 | |
251 i = 0; | |
252 | |
253 for (xs = 0; xs < 32; xs++) | |
254 { | |
255 gint x, y; | |
256 gint r, g, b; | |
257 gint t; | |
258 | |
259 if (x_small) i = (gdouble)w / 32 * xs; | |
260 | |
261 r = g = b = 0; | |
262 | |
263 for (y = j; y < j + y_inc; y++) | |
264 { | |
265 p = pix + (y * rs) + (i * p_step); | |
266 for (x = i; x < i + x_inc; x++) | |
267 { | |
268 r += *p; p++; | |
269 g += *p; p++; | |
270 b += *p; p++; | |
271 if (has_alpha) p++; | |
272 } | |
273 } | |
274 | |
275 t = x_inc * y_inc; | |
276 r /= t; | |
277 g /= t; | |
278 b /= t; | |
279 | |
280 t = ys * 32 + xs; | |
281 sd->avg_r[t] = r; | |
282 sd->avg_g[t] = g; | |
283 sd->avg_b[t] = b; | |
284 | |
285 i += x_inc; | |
286 } | |
287 | |
288 j += y_inc; | |
289 } | |
290 | |
291 sd->filled = TRUE; | |
292 } | |
293 | |
294 ImageSimilarityData *image_sim_new_from_pixbuf(GdkPixbuf *pixbuf) | |
295 { | |
296 ImageSimilarityData *sd; | |
297 | |
298 sd = image_sim_new(); | |
299 image_sim_fill_data(sd, pixbuf); | |
300 | |
301 return sd; | |
302 } | |
303 | |
304 #ifdef ALTERNATE_INCLUDE_COMPARE_CHANGE | |
305 static gdouble alternate_image_sim_compare_fast(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min) | |
306 { | |
307 gint sim; | |
308 gint i; | |
309 gint j; | |
310 gint ld; | |
311 | |
312 if (!a || !b || !a->filled || !b->filled) return 0.0; | |
313 | |
314 | |
315 min = 1.0 - min; | |
316 sim = 0.0; | |
317 ld = 0; | |
318 | |
319 for (j = 0; j < 1024; j+= 32) | |
320 { | |
321 for (i = j; i < j + 32; i++) | |
322 { | |
323 gint cr, cg, cb; | |
324 gint cd; | |
325 | |
326 cr = abs(a->avg_r[i] - b->avg_r[i]); | |
327 cg = abs(a->avg_g[i] - b->avg_g[i]); | |
328 cb = abs(a->avg_b[i] - b->avg_b[i]); | |
329 | |
330 cd = cr + cg + cb; | |
331 sim += cd + abs(cd - ld); | |
332 ld = cd / 3; | |
333 } | |
334 /* check for abort, if so return 0.0 */ | |
335 if ((gdouble)sim / (255.0 * 1024.0 * 4.0) > min) return 0.0; | |
336 } | |
337 | |
338 return (1.0 - ((gdouble)sim / (255.0 * 1024.0 * 4.0)) ); | |
339 } | |
340 #endif | |
341 | |
342 gdouble image_sim_compare(ImageSimilarityData *a, ImageSimilarityData *b) | |
343 { | |
344 gint sim; | |
345 gint i; | |
346 | |
347 if (!a || !b || !a->filled || !b->filled) return 0.0; | |
348 | |
349 sim = 0.0; | |
350 | |
351 for (i = 0; i < 1024; i++) | |
352 { | |
353 sim += abs(a->avg_r[i] - b->avg_r[i]); | |
354 sim += abs(a->avg_g[i] - b->avg_g[i]); | |
355 sim += abs(a->avg_b[i] - b->avg_b[i]); | |
356 } | |
357 | |
358 return 1.0 - ((gdouble)sim / (255.0 * 1024.0 * 3.0)); | |
359 } | |
360 | |
361 /* this uses a cutoff point so that it can abort early when it gets to | |
362 * a point that can simply no longer make the cut-off point. | |
363 */ | |
364 gdouble image_sim_compare_fast(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min) | |
365 { | |
366 gint sim; | |
367 gint i; | |
368 gint j; | |
369 | |
370 #ifdef ALTERNATE_INCLUDE_COMPARE_CHANGE | |
371 if (alternate_enabled) return alternate_image_sim_compare_fast(a, b, min); | |
372 #endif | |
373 | |
374 if (!a || !b || !a->filled || !b->filled) return 0.0; | |
375 | |
376 min = 1.0 - min; | |
377 sim = 0.0; | |
378 | |
379 for (j = 0; j < 1024; j+= 32) | |
380 { | |
381 for (i = j; i < j + 32; i++) | |
382 { | |
383 sim += abs(a->avg_r[i] - b->avg_r[i]); | |
384 sim += abs(a->avg_g[i] - b->avg_g[i]); | |
385 sim += abs(a->avg_b[i] - b->avg_b[i]); | |
386 } | |
387 /* check for abort, if so return 0.0 */ | |
388 if ((gdouble)sim / (255.0 * 1024.0 * 3.0) > min) return 0.0; | |
389 } | |
390 | |
391 return (1.0 - ((gdouble)sim / (255.0 * 1024.0 * 3.0)) ); | |
392 } |