9
|
1 /*
|
196
|
2 * Geeqie
|
9
|
3 * (C) 2004 John Ellis
|
475
|
4 * Copyright (C) 2008 The Geeqie Team
|
9
|
5 *
|
|
6 * Author: John Ellis
|
|
7 *
|
|
8 * This software is released under the GNU General Public License (GNU GPL).
|
|
9 * Please read the included file COPYING for more information.
|
|
10 * This software comes with no warranty of any kind, use at your own risk!
|
|
11 */
|
|
12
|
|
13
|
281
|
14 #include "main.h"
|
9
|
15 #include "similar.h"
|
|
16
|
|
17 /*
|
|
18 * These functions are intended to find images with similar color content. For
|
|
19 * example when an image was saved at different compression levels or dimensions
|
|
20 * (scaled down/up) the contents are similar, but these files do not match by file
|
|
21 * size, dimensions, or checksum.
|
|
22 *
|
|
23 * These functions create a 32 x 32 array for each color channel (red, green, blue).
|
|
24 * The array represents the average color of each corresponding part of the
|
|
25 * image. (imagine the image cut into 1024 rectangles, or a 32 x 32 grid.
|
|
26 * Each grid is then processed for the average color value, this is what
|
|
27 * is stored in the array)
|
|
28 *
|
|
29 * To compare two images, generate a ImageSimilarityData for each image, then
|
|
30 * pass them to the compare function. The return value is the percent match
|
|
31 * of the two images. (for this, simple comparisons are used, basically the return
|
|
32 * is an average of the corresponding array differences)
|
|
33 *
|
|
34 * for image_sim_compare(), the return is 0.0 to 1.0:
|
|
35 * 1.0 for exact matches (an image is compared to itself)
|
|
36 * 0.0 for exact opposite images (compare an all black to an all white image)
|
|
37 * generally only a match of > 0.85 are significant at all, and >.95 is useful to
|
|
38 * find images that have been re-saved to other formats, dimensions, or compression.
|
|
39 */
|
|
40
|
|
41
|
|
42 /*
|
|
43 * The experimental (alternate) algorithm is only for testing of new techniques to
|
|
44 * improve the result, and hopes to reduce false positives.
|
|
45 */
|
|
46
|
|
47 static gint alternate_enabled = FALSE;
|
|
48
|
|
49 void image_sim_alternate_set(gint enable)
|
|
50 {
|
|
51 alternate_enabled = enable;
|
|
52 }
|
|
53
|
|
54 gint image_sim_alternate_enabled(void)
|
|
55 {
|
|
56 return alternate_enabled;
|
|
57 }
|
|
58
|
|
59 ImageSimilarityData *image_sim_new(void)
|
|
60 {
|
|
61 ImageSimilarityData *sd = g_new0(ImageSimilarityData, 1);
|
|
62
|
|
63 return sd;
|
|
64 }
|
|
65
|
|
66 void image_sim_free(ImageSimilarityData *sd)
|
|
67 {
|
|
68 g_free(sd);
|
|
69 }
|
|
70
|
|
71 #if 0
|
|
72 static void image_sim_channel_expand(guint8 *pix, gint len)
|
|
73 {
|
|
74 guint8 l, h;
|
|
75 gint i;
|
|
76 gdouble scale;
|
|
77
|
|
78 /* set the start values */
|
|
79 l = h = pix[0];
|
|
80
|
|
81 /* find min/max */
|
|
82 for (i = 0; i < len; i++)
|
|
83 {
|
|
84 if (pix[i] < l) l = pix[i];
|
|
85 if (pix[i] > h) h = pix[i];
|
|
86 }
|
|
87
|
|
88 /* calc scale from range */
|
|
89 if (l != h)
|
|
90 {
|
|
91 scale = 255.0 / (gdouble)(h - l);
|
|
92 }
|
|
93 else
|
|
94 {
|
|
95 scale = 1.0;
|
|
96 }
|
|
97
|
|
98 for (i = 0; i < len; i++)
|
|
99 {
|
|
100 pix[i] = (guint8)((gdouble)pix[i] - l * scale);
|
|
101 }
|
|
102 }
|
|
103 #endif
|
|
104
|
|
105 static int image_sim_channel_eq_sort_cb(const void *a, const void *b)
|
|
106 {
|
|
107 gint *pa = (void *)a;
|
|
108 gint *pb = (void *)b;
|
|
109 if (pa[1] < pb[1]) return -1;
|
|
110 if (pa[1] > pb[1]) return 1;
|
|
111 return 0;
|
|
112 }
|
|
113
|
|
114 static void image_sim_channel_equal(guint8 *pix, gint len)
|
|
115 {
|
|
116 gint *buf;
|
|
117 gint i;
|
|
118 gint p;
|
|
119
|
|
120 buf = g_new0(gint, len * 2);
|
|
121
|
|
122 p = 0;
|
|
123 for (i = 0; i < len; i++)
|
|
124 {
|
|
125 buf[p] = i;
|
|
126 p++;
|
|
127 buf[p] = (gint)pix[i];
|
|
128 p++;
|
|
129 }
|
|
130
|
|
131 qsort (buf, len, sizeof(gint) * 2, image_sim_channel_eq_sort_cb);
|
|
132
|
|
133 p = 0;
|
|
134 for (i = 0; i < len; i++)
|
|
135 {
|
|
136 gint n;
|
|
137
|
|
138 n = buf[p];
|
|
139 p+= 2;
|
|
140 pix[n] = (guint8)(255 * i / len);
|
|
141 }
|
|
142
|
|
143 g_free(buf);
|
|
144 }
|
|
145
|
|
146 static void image_sim_channel_norm(guint8 *pix, gint len)
|
|
147 {
|
|
148 guint8 l, h;
|
|
149 gint i;
|
|
150 gdouble scale;
|
|
151
|
|
152 l = h = pix[0];
|
|
153
|
|
154 for (i = 0; i < len; i++)
|
|
155 {
|
|
156 if (pix[i] < l) l = pix[i];
|
|
157 if (pix[i] > h) h = pix[i];
|
|
158 }
|
|
159
|
|
160 scale = (h-l !=0) ? 255.0 / (gdouble)(h - l) : 1.0;
|
|
161
|
|
162 for (i = 0; i < len; i++)
|
|
163 {
|
|
164 pix[i] = (guint8)((gdouble)(pix[i] - l) * scale);
|
|
165 }
|
|
166 }
|
|
167
|
|
168 /*
|
|
169 * define these to enable various components of the experimental compare functions
|
|
170 *
|
|
171 * Convert the thumbprint to greyscale (ignore all color information when comparing)
|
|
172 * #define ALTERNATE_USES_GREYSCALE 1
|
|
173 *
|
|
174 * Take into account the difference in change from one pixel to the next
|
|
175 * #define ALTERNATE_INCLUDE_COMPARE_CHANGE 1
|
|
176 */
|
|
177
|
|
178 void image_sim_alternate_processing(ImageSimilarityData *sd)
|
|
179 {
|
|
180 #ifdef ALTERNATE_USES_GREYSCALE
|
|
181 gint i;
|
|
182 #endif
|
|
183
|
|
184 if (!alternate_enabled) return;
|
442
|
185
|
9
|
186 image_sim_channel_norm(sd->avg_r, sizeof(sd->avg_r));
|
|
187 image_sim_channel_norm(sd->avg_g, sizeof(sd->avg_g));
|
|
188 image_sim_channel_norm(sd->avg_b, sizeof(sd->avg_b));
|
|
189
|
|
190 image_sim_channel_equal(sd->avg_r, sizeof(sd->avg_r));
|
|
191 image_sim_channel_equal(sd->avg_g, sizeof(sd->avg_g));
|
|
192 image_sim_channel_equal(sd->avg_b, sizeof(sd->avg_b));
|
|
193
|
|
194 #ifdef ALTERNATE_USES_GREYSCALE
|
|
195 for (i = 0; i < sizeof(sd->avg_r); i++)
|
|
196 {
|
|
197 guint8 n;
|
442
|
198
|
9
|
199 n = (guint8)((gint)(sd->avg_r[i] + sd->avg_g[i] + sd->avg_b[i]) / 3);
|
|
200 sd->avg_r[i] = sd->avg_g[i] = sd->avg_b[i] = n;
|
|
201 }
|
|
202 #endif
|
|
203 }
|
|
204
|
|
205 void image_sim_fill_data(ImageSimilarityData *sd, GdkPixbuf *pixbuf)
|
|
206 {
|
|
207 gint w, h;
|
|
208 gint rs;
|
|
209 guchar *pix;
|
|
210 gint has_alpha;
|
|
211 gint p_step;
|
|
212
|
|
213 guchar *p;
|
|
214 gint i;
|
|
215 gint j;
|
|
216 gint x_inc, y_inc;
|
|
217 gint xs, ys;
|
|
218
|
|
219 gint x_small = FALSE; /* if less than 32 w or h, set TRUE */
|
|
220 gint y_small = FALSE;
|
|
221
|
|
222 if (!sd || !pixbuf) return;
|
|
223
|
|
224 w = gdk_pixbuf_get_width(pixbuf);
|
|
225 h = gdk_pixbuf_get_height(pixbuf);
|
|
226 rs = gdk_pixbuf_get_rowstride(pixbuf);
|
|
227 pix = gdk_pixbuf_get_pixels(pixbuf);
|
|
228 has_alpha = gdk_pixbuf_get_has_alpha(pixbuf);
|
|
229
|
|
230 p_step = has_alpha ? 4 : 3;
|
|
231 x_inc = w / 32;
|
|
232 y_inc = h / 32;
|
|
233
|
|
234 if (x_inc < 1)
|
|
235 {
|
|
236 x_inc = 1;
|
|
237 x_small = TRUE;
|
|
238 }
|
|
239 if (y_inc < 1)
|
|
240 {
|
|
241 y_inc = 1;
|
|
242 y_small = TRUE;
|
|
243 }
|
|
244
|
|
245 j = 0;
|
|
246
|
|
247 for (ys = 0; ys < 32; ys++)
|
|
248 {
|
|
249 if (y_small) j = (gdouble)h / 32 * ys;
|
|
250
|
|
251 i = 0;
|
|
252
|
|
253 for (xs = 0; xs < 32; xs++)
|
|
254 {
|
|
255 gint x, y;
|
|
256 gint r, g, b;
|
|
257 gint t;
|
|
258
|
|
259 if (x_small) i = (gdouble)w / 32 * xs;
|
|
260
|
|
261 r = g = b = 0;
|
|
262
|
|
263 for (y = j; y < j + y_inc; y++)
|
|
264 {
|
|
265 p = pix + (y * rs) + (i * p_step);
|
|
266 for (x = i; x < i + x_inc; x++)
|
|
267 {
|
|
268 r += *p; p++;
|
|
269 g += *p; p++;
|
|
270 b += *p; p++;
|
|
271 if (has_alpha) p++;
|
|
272 }
|
|
273 }
|
|
274
|
|
275 t = x_inc * y_inc;
|
|
276 r /= t;
|
|
277 g /= t;
|
|
278 b /= t;
|
|
279
|
|
280 t = ys * 32 + xs;
|
|
281 sd->avg_r[t] = r;
|
|
282 sd->avg_g[t] = g;
|
|
283 sd->avg_b[t] = b;
|
|
284
|
|
285 i += x_inc;
|
|
286 }
|
|
287
|
|
288 j += y_inc;
|
|
289 }
|
|
290
|
|
291 sd->filled = TRUE;
|
|
292 }
|
|
293
|
|
294 ImageSimilarityData *image_sim_new_from_pixbuf(GdkPixbuf *pixbuf)
|
|
295 {
|
|
296 ImageSimilarityData *sd;
|
|
297
|
|
298 sd = image_sim_new();
|
|
299 image_sim_fill_data(sd, pixbuf);
|
|
300
|
|
301 return sd;
|
|
302 }
|
|
303
|
|
304 #ifdef ALTERNATE_INCLUDE_COMPARE_CHANGE
|
|
305 static gdouble alternate_image_sim_compare_fast(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min)
|
|
306 {
|
|
307 gint sim;
|
|
308 gint i;
|
|
309 gint j;
|
|
310 gint ld;
|
|
311
|
|
312 if (!a || !b || !a->filled || !b->filled) return 0.0;
|
|
313
|
|
314
|
|
315 min = 1.0 - min;
|
|
316 sim = 0.0;
|
|
317 ld = 0;
|
|
318
|
|
319 for (j = 0; j < 1024; j+= 32)
|
|
320 {
|
|
321 for (i = j; i < j + 32; i++)
|
|
322 {
|
|
323 gint cr, cg, cb;
|
|
324 gint cd;
|
|
325
|
|
326 cr = abs(a->avg_r[i] - b->avg_r[i]);
|
|
327 cg = abs(a->avg_g[i] - b->avg_g[i]);
|
|
328 cb = abs(a->avg_b[i] - b->avg_b[i]);
|
|
329
|
|
330 cd = cr + cg + cb;
|
|
331 sim += cd + abs(cd - ld);
|
|
332 ld = cd / 3;
|
|
333 }
|
|
334 /* check for abort, if so return 0.0 */
|
|
335 if ((gdouble)sim / (255.0 * 1024.0 * 4.0) > min) return 0.0;
|
|
336 }
|
|
337
|
|
338 return (1.0 - ((gdouble)sim / (255.0 * 1024.0 * 4.0)) );
|
|
339 }
|
|
340 #endif
|
|
341
|
|
342 gdouble image_sim_compare(ImageSimilarityData *a, ImageSimilarityData *b)
|
|
343 {
|
|
344 gint sim;
|
|
345 gint i;
|
|
346
|
|
347 if (!a || !b || !a->filled || !b->filled) return 0.0;
|
|
348
|
|
349 sim = 0.0;
|
|
350
|
|
351 for (i = 0; i < 1024; i++)
|
|
352 {
|
|
353 sim += abs(a->avg_r[i] - b->avg_r[i]);
|
|
354 sim += abs(a->avg_g[i] - b->avg_g[i]);
|
|
355 sim += abs(a->avg_b[i] - b->avg_b[i]);
|
|
356 }
|
|
357
|
|
358 return 1.0 - ((gdouble)sim / (255.0 * 1024.0 * 3.0));
|
|
359 }
|
|
360
|
|
361 /* this uses a cutoff point so that it can abort early when it gets to
|
|
362 * a point that can simply no longer make the cut-off point.
|
|
363 */
|
|
364 gdouble image_sim_compare_fast(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min)
|
|
365 {
|
|
366 gint sim;
|
|
367 gint i;
|
|
368 gint j;
|
|
369
|
|
370 #ifdef ALTERNATE_INCLUDE_COMPARE_CHANGE
|
|
371 if (alternate_enabled) return alternate_image_sim_compare_fast(a, b, min);
|
|
372 #endif
|
|
373
|
|
374 if (!a || !b || !a->filled || !b->filled) return 0.0;
|
|
375
|
|
376 min = 1.0 - min;
|
|
377 sim = 0.0;
|
|
378
|
|
379 for (j = 0; j < 1024; j+= 32)
|
|
380 {
|
|
381 for (i = j; i < j + 32; i++)
|
|
382 {
|
|
383 sim += abs(a->avg_r[i] - b->avg_r[i]);
|
|
384 sim += abs(a->avg_g[i] - b->avg_g[i]);
|
|
385 sim += abs(a->avg_b[i] - b->avg_b[i]);
|
|
386 }
|
|
387 /* check for abort, if so return 0.0 */
|
|
388 if ((gdouble)sim / (255.0 * 1024.0 * 3.0) > min) return 0.0;
|
|
389 }
|
|
390
|
|
391 return (1.0 - ((gdouble)sim / (255.0 * 1024.0 * 3.0)) );
|
|
392 }
|