9
|
1 /*
|
|
2 * GQview
|
|
3 * (C) 2004 John Ellis
|
|
4 *
|
|
5 * Author: John Ellis
|
|
6 *
|
|
7 * This software is released under the GNU General Public License (GNU GPL).
|
|
8 * Please read the included file COPYING for more information.
|
|
9 * This software comes with no warranty of any kind, use at your own risk!
|
|
10 */
|
|
11
|
|
12
|
|
13 #include "gqview.h"
|
|
14 #include "similar.h"
|
|
15
|
|
16 /*
|
|
17 * These functions are intended to find images with similar color content. For
|
|
18 * example when an image was saved at different compression levels or dimensions
|
|
19 * (scaled down/up) the contents are similar, but these files do not match by file
|
|
20 * size, dimensions, or checksum.
|
|
21 *
|
|
22 * These functions create a 32 x 32 array for each color channel (red, green, blue).
|
|
23 * The array represents the average color of each corresponding part of the
|
|
24 * image. (imagine the image cut into 1024 rectangles, or a 32 x 32 grid.
|
|
25 * Each grid is then processed for the average color value, this is what
|
|
26 * is stored in the array)
|
|
27 *
|
|
28 * To compare two images, generate a ImageSimilarityData for each image, then
|
|
29 * pass them to the compare function. The return value is the percent match
|
|
30 * of the two images. (for this, simple comparisons are used, basically the return
|
|
31 * is an average of the corresponding array differences)
|
|
32 *
|
|
33 * for image_sim_compare(), the return is 0.0 to 1.0:
|
|
34 * 1.0 for exact matches (an image is compared to itself)
|
|
35 * 0.0 for exact opposite images (compare an all black to an all white image)
|
|
36 * generally only a match of > 0.85 are significant at all, and >.95 is useful to
|
|
37 * find images that have been re-saved to other formats, dimensions, or compression.
|
|
38 */
|
|
39
|
|
40
|
|
41 /*
|
|
42 * The experimental (alternate) algorithm is only for testing of new techniques to
|
|
43 * improve the result, and hopes to reduce false positives.
|
|
44 */
|
|
45
|
|
46 static gint alternate_enabled = FALSE;
|
|
47
|
|
48 void image_sim_alternate_set(gint enable)
|
|
49 {
|
|
50 alternate_enabled = enable;
|
|
51 }
|
|
52
|
|
53 gint image_sim_alternate_enabled(void)
|
|
54 {
|
|
55 return alternate_enabled;
|
|
56 }
|
|
57
|
|
58 ImageSimilarityData *image_sim_new(void)
|
|
59 {
|
|
60 ImageSimilarityData *sd = g_new0(ImageSimilarityData, 1);
|
|
61
|
|
62 return sd;
|
|
63 }
|
|
64
|
|
65 void image_sim_free(ImageSimilarityData *sd)
|
|
66 {
|
|
67 g_free(sd);
|
|
68 }
|
|
69
|
|
70 #if 0
|
|
71 static void image_sim_channel_expand(guint8 *pix, gint len)
|
|
72 {
|
|
73 guint8 l, h;
|
|
74 gint i;
|
|
75 gdouble scale;
|
|
76
|
|
77 /* set the start values */
|
|
78 l = h = pix[0];
|
|
79
|
|
80 /* find min/max */
|
|
81 for (i = 0; i < len; i++)
|
|
82 {
|
|
83 if (pix[i] < l) l = pix[i];
|
|
84 if (pix[i] > h) h = pix[i];
|
|
85 }
|
|
86
|
|
87 /* calc scale from range */
|
|
88 if (l != h)
|
|
89 {
|
|
90 scale = 255.0 / (gdouble)(h - l);
|
|
91 }
|
|
92 else
|
|
93 {
|
|
94 scale = 1.0;
|
|
95 }
|
|
96
|
|
97 for (i = 0; i < len; i++)
|
|
98 {
|
|
99 pix[i] = (guint8)((gdouble)pix[i] - l * scale);
|
|
100 }
|
|
101 }
|
|
102 #endif
|
|
103
|
|
104 static int image_sim_channel_eq_sort_cb(const void *a, const void *b)
|
|
105 {
|
|
106 gint *pa = (void *)a;
|
|
107 gint *pb = (void *)b;
|
|
108 if (pa[1] < pb[1]) return -1;
|
|
109 if (pa[1] > pb[1]) return 1;
|
|
110 return 0;
|
|
111 }
|
|
112
|
|
113 static void image_sim_channel_equal(guint8 *pix, gint len)
|
|
114 {
|
|
115 gint *buf;
|
|
116 gint i;
|
|
117 gint p;
|
|
118
|
|
119 buf = g_new0(gint, len * 2);
|
|
120
|
|
121 p = 0;
|
|
122 for (i = 0; i < len; i++)
|
|
123 {
|
|
124 buf[p] = i;
|
|
125 p++;
|
|
126 buf[p] = (gint)pix[i];
|
|
127 p++;
|
|
128 }
|
|
129
|
|
130 qsort (buf, len, sizeof(gint) * 2, image_sim_channel_eq_sort_cb);
|
|
131
|
|
132 p = 0;
|
|
133 for (i = 0; i < len; i++)
|
|
134 {
|
|
135 gint n;
|
|
136
|
|
137 n = buf[p];
|
|
138 p+= 2;
|
|
139 pix[n] = (guint8)(255 * i / len);
|
|
140 }
|
|
141
|
|
142 g_free(buf);
|
|
143 }
|
|
144
|
|
145 static void image_sim_channel_norm(guint8 *pix, gint len)
|
|
146 {
|
|
147 guint8 l, h;
|
|
148 gint i;
|
|
149 gdouble scale;
|
|
150
|
|
151 l = h = pix[0];
|
|
152
|
|
153 for (i = 0; i < len; i++)
|
|
154 {
|
|
155 if (pix[i] < l) l = pix[i];
|
|
156 if (pix[i] > h) h = pix[i];
|
|
157 }
|
|
158
|
|
159 scale = (h-l !=0) ? 255.0 / (gdouble)(h - l) : 1.0;
|
|
160
|
|
161 for (i = 0; i < len; i++)
|
|
162 {
|
|
163 pix[i] = (guint8)((gdouble)(pix[i] - l) * scale);
|
|
164 }
|
|
165 }
|
|
166
|
|
167 /*
|
|
168 * define these to enable various components of the experimental compare functions
|
|
169 *
|
|
170 * Convert the thumbprint to greyscale (ignore all color information when comparing)
|
|
171 * #define ALTERNATE_USES_GREYSCALE 1
|
|
172 *
|
|
173 * Take into account the difference in change from one pixel to the next
|
|
174 * #define ALTERNATE_INCLUDE_COMPARE_CHANGE 1
|
|
175 */
|
|
176
|
|
177 void image_sim_alternate_processing(ImageSimilarityData *sd)
|
|
178 {
|
|
179 #ifdef ALTERNATE_USES_GREYSCALE
|
|
180 gint i;
|
|
181 #endif
|
|
182
|
|
183 if (!alternate_enabled) return;
|
|
184
|
|
185 image_sim_channel_norm(sd->avg_r, sizeof(sd->avg_r));
|
|
186 image_sim_channel_norm(sd->avg_g, sizeof(sd->avg_g));
|
|
187 image_sim_channel_norm(sd->avg_b, sizeof(sd->avg_b));
|
|
188
|
|
189 image_sim_channel_equal(sd->avg_r, sizeof(sd->avg_r));
|
|
190 image_sim_channel_equal(sd->avg_g, sizeof(sd->avg_g));
|
|
191 image_sim_channel_equal(sd->avg_b, sizeof(sd->avg_b));
|
|
192
|
|
193 #ifdef ALTERNATE_USES_GREYSCALE
|
|
194 for (i = 0; i < sizeof(sd->avg_r); i++)
|
|
195 {
|
|
196 guint8 n;
|
|
197
|
|
198 n = (guint8)((gint)(sd->avg_r[i] + sd->avg_g[i] + sd->avg_b[i]) / 3);
|
|
199 sd->avg_r[i] = sd->avg_g[i] = sd->avg_b[i] = n;
|
|
200 }
|
|
201 #endif
|
|
202 }
|
|
203
|
|
204 void image_sim_fill_data(ImageSimilarityData *sd, GdkPixbuf *pixbuf)
|
|
205 {
|
|
206 gint w, h;
|
|
207 gint rs;
|
|
208 guchar *pix;
|
|
209 gint has_alpha;
|
|
210 gint p_step;
|
|
211
|
|
212 guchar *p;
|
|
213 gint i;
|
|
214 gint j;
|
|
215 gint x_inc, y_inc;
|
|
216 gint xs, ys;
|
|
217
|
|
218 gint x_small = FALSE; /* if less than 32 w or h, set TRUE */
|
|
219 gint y_small = FALSE;
|
|
220
|
|
221 if (!sd || !pixbuf) return;
|
|
222
|
|
223 w = gdk_pixbuf_get_width(pixbuf);
|
|
224 h = gdk_pixbuf_get_height(pixbuf);
|
|
225 rs = gdk_pixbuf_get_rowstride(pixbuf);
|
|
226 pix = gdk_pixbuf_get_pixels(pixbuf);
|
|
227 has_alpha = gdk_pixbuf_get_has_alpha(pixbuf);
|
|
228
|
|
229 p_step = has_alpha ? 4 : 3;
|
|
230 x_inc = w / 32;
|
|
231 y_inc = h / 32;
|
|
232
|
|
233 if (x_inc < 1)
|
|
234 {
|
|
235 x_inc = 1;
|
|
236 x_small = TRUE;
|
|
237 }
|
|
238 if (y_inc < 1)
|
|
239 {
|
|
240 y_inc = 1;
|
|
241 y_small = TRUE;
|
|
242 }
|
|
243
|
|
244 j = 0;
|
|
245
|
|
246 for (ys = 0; ys < 32; ys++)
|
|
247 {
|
|
248 if (y_small) j = (gdouble)h / 32 * ys;
|
|
249
|
|
250 i = 0;
|
|
251
|
|
252 for (xs = 0; xs < 32; xs++)
|
|
253 {
|
|
254 gint x, y;
|
|
255 gint r, g, b;
|
|
256 gint t;
|
|
257
|
|
258 if (x_small) i = (gdouble)w / 32 * xs;
|
|
259
|
|
260 r = g = b = 0;
|
|
261
|
|
262 for (y = j; y < j + y_inc; y++)
|
|
263 {
|
|
264 p = pix + (y * rs) + (i * p_step);
|
|
265 for (x = i; x < i + x_inc; x++)
|
|
266 {
|
|
267 r += *p; p++;
|
|
268 g += *p; p++;
|
|
269 b += *p; p++;
|
|
270 if (has_alpha) p++;
|
|
271 }
|
|
272 }
|
|
273
|
|
274 t = x_inc * y_inc;
|
|
275 r /= t;
|
|
276 g /= t;
|
|
277 b /= t;
|
|
278
|
|
279 t = ys * 32 + xs;
|
|
280 sd->avg_r[t] = r;
|
|
281 sd->avg_g[t] = g;
|
|
282 sd->avg_b[t] = b;
|
|
283
|
|
284 i += x_inc;
|
|
285 }
|
|
286
|
|
287 j += y_inc;
|
|
288 }
|
|
289
|
|
290 sd->filled = TRUE;
|
|
291 }
|
|
292
|
|
293 ImageSimilarityData *image_sim_new_from_pixbuf(GdkPixbuf *pixbuf)
|
|
294 {
|
|
295 ImageSimilarityData *sd;
|
|
296
|
|
297 sd = image_sim_new();
|
|
298 image_sim_fill_data(sd, pixbuf);
|
|
299
|
|
300 return sd;
|
|
301 }
|
|
302
|
|
303 #ifdef ALTERNATE_INCLUDE_COMPARE_CHANGE
|
|
304 static gdouble alternate_image_sim_compare_fast(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min)
|
|
305 {
|
|
306 gint sim;
|
|
307 gint i;
|
|
308 gint j;
|
|
309 gint ld;
|
|
310
|
|
311 if (!a || !b || !a->filled || !b->filled) return 0.0;
|
|
312
|
|
313
|
|
314 min = 1.0 - min;
|
|
315 sim = 0.0;
|
|
316 ld = 0;
|
|
317
|
|
318 for (j = 0; j < 1024; j+= 32)
|
|
319 {
|
|
320 for (i = j; i < j + 32; i++)
|
|
321 {
|
|
322 gint cr, cg, cb;
|
|
323 gint cd;
|
|
324
|
|
325 cr = abs(a->avg_r[i] - b->avg_r[i]);
|
|
326 cg = abs(a->avg_g[i] - b->avg_g[i]);
|
|
327 cb = abs(a->avg_b[i] - b->avg_b[i]);
|
|
328
|
|
329 cd = cr + cg + cb;
|
|
330 sim += cd + abs(cd - ld);
|
|
331 ld = cd / 3;
|
|
332 }
|
|
333 /* check for abort, if so return 0.0 */
|
|
334 if ((gdouble)sim / (255.0 * 1024.0 * 4.0) > min) return 0.0;
|
|
335 }
|
|
336
|
|
337 return (1.0 - ((gdouble)sim / (255.0 * 1024.0 * 4.0)) );
|
|
338 }
|
|
339 #endif
|
|
340
|
|
341 gdouble image_sim_compare(ImageSimilarityData *a, ImageSimilarityData *b)
|
|
342 {
|
|
343 gint sim;
|
|
344 gint i;
|
|
345
|
|
346 if (!a || !b || !a->filled || !b->filled) return 0.0;
|
|
347
|
|
348 sim = 0.0;
|
|
349
|
|
350 for (i = 0; i < 1024; i++)
|
|
351 {
|
|
352 sim += abs(a->avg_r[i] - b->avg_r[i]);
|
|
353 sim += abs(a->avg_g[i] - b->avg_g[i]);
|
|
354 sim += abs(a->avg_b[i] - b->avg_b[i]);
|
|
355 }
|
|
356
|
|
357 return 1.0 - ((gdouble)sim / (255.0 * 1024.0 * 3.0));
|
|
358 }
|
|
359
|
|
360 /* this uses a cutoff point so that it can abort early when it gets to
|
|
361 * a point that can simply no longer make the cut-off point.
|
|
362 */
|
|
363 gdouble image_sim_compare_fast(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min)
|
|
364 {
|
|
365 gint sim;
|
|
366 gint i;
|
|
367 gint j;
|
|
368
|
|
369 #ifdef ALTERNATE_INCLUDE_COMPARE_CHANGE
|
|
370 if (alternate_enabled) return alternate_image_sim_compare_fast(a, b, min);
|
|
371 #endif
|
|
372
|
|
373 if (!a || !b || !a->filled || !b->filled) return 0.0;
|
|
374
|
|
375 min = 1.0 - min;
|
|
376 sim = 0.0;
|
|
377
|
|
378 for (j = 0; j < 1024; j+= 32)
|
|
379 {
|
|
380 for (i = j; i < j + 32; i++)
|
|
381 {
|
|
382 sim += abs(a->avg_r[i] - b->avg_r[i]);
|
|
383 sim += abs(a->avg_g[i] - b->avg_g[i]);
|
|
384 sim += abs(a->avg_b[i] - b->avg_b[i]);
|
|
385 }
|
|
386 /* check for abort, if so return 0.0 */
|
|
387 if ((gdouble)sim / (255.0 * 1024.0 * 3.0) > min) return 0.0;
|
|
388 }
|
|
389
|
|
390 return (1.0 - ((gdouble)sim / (255.0 * 1024.0 * 3.0)) );
|
|
391 }
|