comparison src/similar.c @ 9:d907d608745f

Sync to GQview 1.5.9 release. ######## DO NOT BASE ENHANCEMENTS OR TRANSLATION UPDATES ON CODE IN THIS CVS! This CVS is never up to date with current development and is provided solely for reference purposes, please use the latest official release package when making any changes or translation updates. ########
author gqview
date Sat, 26 Feb 2005 00:13:35 +0000
parents
children f6e307c7bad6
comparison
equal deleted inserted replaced
8:e0d0593d519e 9:d907d608745f
1 /*
2 * GQview
3 * (C) 2004 John Ellis
4 *
5 * Author: John Ellis
6 *
7 * This software is released under the GNU General Public License (GNU GPL).
8 * Please read the included file COPYING for more information.
9 * This software comes with no warranty of any kind, use at your own risk!
10 */
11
12
13 #include "gqview.h"
14 #include "similar.h"
15
16 /*
17 * These functions are intended to find images with similar color content. For
18 * example when an image was saved at different compression levels or dimensions
19 * (scaled down/up) the contents are similar, but these files do not match by file
20 * size, dimensions, or checksum.
21 *
22 * These functions create a 32 x 32 array for each color channel (red, green, blue).
23 * The array represents the average color of each corresponding part of the
24 * image. (imagine the image cut into 1024 rectangles, or a 32 x 32 grid.
25 * Each grid is then processed for the average color value, this is what
26 * is stored in the array)
27 *
28 * To compare two images, generate a ImageSimilarityData for each image, then
29 * pass them to the compare function. The return value is the percent match
30 * of the two images. (for this, simple comparisons are used, basically the return
31 * is an average of the corresponding array differences)
32 *
33 * for image_sim_compare(), the return is 0.0 to 1.0:
34 * 1.0 for exact matches (an image is compared to itself)
35 * 0.0 for exact opposite images (compare an all black to an all white image)
36 * generally only a match of > 0.85 are significant at all, and >.95 is useful to
37 * find images that have been re-saved to other formats, dimensions, or compression.
38 */
39
40
41 /*
42 * The experimental (alternate) algorithm is only for testing of new techniques to
43 * improve the result, and hopes to reduce false positives.
44 */
45
46 static gint alternate_enabled = FALSE;
47
48 void image_sim_alternate_set(gint enable)
49 {
50 alternate_enabled = enable;
51 }
52
53 gint image_sim_alternate_enabled(void)
54 {
55 return alternate_enabled;
56 }
57
58 ImageSimilarityData *image_sim_new(void)
59 {
60 ImageSimilarityData *sd = g_new0(ImageSimilarityData, 1);
61
62 return sd;
63 }
64
65 void image_sim_free(ImageSimilarityData *sd)
66 {
67 g_free(sd);
68 }
69
70 #if 0
71 static void image_sim_channel_expand(guint8 *pix, gint len)
72 {
73 guint8 l, h;
74 gint i;
75 gdouble scale;
76
77 /* set the start values */
78 l = h = pix[0];
79
80 /* find min/max */
81 for (i = 0; i < len; i++)
82 {
83 if (pix[i] < l) l = pix[i];
84 if (pix[i] > h) h = pix[i];
85 }
86
87 /* calc scale from range */
88 if (l != h)
89 {
90 scale = 255.0 / (gdouble)(h - l);
91 }
92 else
93 {
94 scale = 1.0;
95 }
96
97 for (i = 0; i < len; i++)
98 {
99 pix[i] = (guint8)((gdouble)pix[i] - l * scale);
100 }
101 }
102 #endif
103
104 static int image_sim_channel_eq_sort_cb(const void *a, const void *b)
105 {
106 gint *pa = (void *)a;
107 gint *pb = (void *)b;
108 if (pa[1] < pb[1]) return -1;
109 if (pa[1] > pb[1]) return 1;
110 return 0;
111 }
112
113 static void image_sim_channel_equal(guint8 *pix, gint len)
114 {
115 gint *buf;
116 gint i;
117 gint p;
118
119 buf = g_new0(gint, len * 2);
120
121 p = 0;
122 for (i = 0; i < len; i++)
123 {
124 buf[p] = i;
125 p++;
126 buf[p] = (gint)pix[i];
127 p++;
128 }
129
130 qsort (buf, len, sizeof(gint) * 2, image_sim_channel_eq_sort_cb);
131
132 p = 0;
133 for (i = 0; i < len; i++)
134 {
135 gint n;
136
137 n = buf[p];
138 p+= 2;
139 pix[n] = (guint8)(255 * i / len);
140 }
141
142 g_free(buf);
143 }
144
145 static void image_sim_channel_norm(guint8 *pix, gint len)
146 {
147 guint8 l, h;
148 gint i;
149 gdouble scale;
150
151 l = h = pix[0];
152
153 for (i = 0; i < len; i++)
154 {
155 if (pix[i] < l) l = pix[i];
156 if (pix[i] > h) h = pix[i];
157 }
158
159 scale = (h-l !=0) ? 255.0 / (gdouble)(h - l) : 1.0;
160
161 for (i = 0; i < len; i++)
162 {
163 pix[i] = (guint8)((gdouble)(pix[i] - l) * scale);
164 }
165 }
166
167 /*
168 * define these to enable various components of the experimental compare functions
169 *
170 * Convert the thumbprint to greyscale (ignore all color information when comparing)
171 * #define ALTERNATE_USES_GREYSCALE 1
172 *
173 * Take into account the difference in change from one pixel to the next
174 * #define ALTERNATE_INCLUDE_COMPARE_CHANGE 1
175 */
176
177 void image_sim_alternate_processing(ImageSimilarityData *sd)
178 {
179 #ifdef ALTERNATE_USES_GREYSCALE
180 gint i;
181 #endif
182
183 if (!alternate_enabled) return;
184
185 image_sim_channel_norm(sd->avg_r, sizeof(sd->avg_r));
186 image_sim_channel_norm(sd->avg_g, sizeof(sd->avg_g));
187 image_sim_channel_norm(sd->avg_b, sizeof(sd->avg_b));
188
189 image_sim_channel_equal(sd->avg_r, sizeof(sd->avg_r));
190 image_sim_channel_equal(sd->avg_g, sizeof(sd->avg_g));
191 image_sim_channel_equal(sd->avg_b, sizeof(sd->avg_b));
192
193 #ifdef ALTERNATE_USES_GREYSCALE
194 for (i = 0; i < sizeof(sd->avg_r); i++)
195 {
196 guint8 n;
197
198 n = (guint8)((gint)(sd->avg_r[i] + sd->avg_g[i] + sd->avg_b[i]) / 3);
199 sd->avg_r[i] = sd->avg_g[i] = sd->avg_b[i] = n;
200 }
201 #endif
202 }
203
204 void image_sim_fill_data(ImageSimilarityData *sd, GdkPixbuf *pixbuf)
205 {
206 gint w, h;
207 gint rs;
208 guchar *pix;
209 gint has_alpha;
210 gint p_step;
211
212 guchar *p;
213 gint i;
214 gint j;
215 gint x_inc, y_inc;
216 gint xs, ys;
217
218 gint x_small = FALSE; /* if less than 32 w or h, set TRUE */
219 gint y_small = FALSE;
220
221 if (!sd || !pixbuf) return;
222
223 w = gdk_pixbuf_get_width(pixbuf);
224 h = gdk_pixbuf_get_height(pixbuf);
225 rs = gdk_pixbuf_get_rowstride(pixbuf);
226 pix = gdk_pixbuf_get_pixels(pixbuf);
227 has_alpha = gdk_pixbuf_get_has_alpha(pixbuf);
228
229 p_step = has_alpha ? 4 : 3;
230 x_inc = w / 32;
231 y_inc = h / 32;
232
233 if (x_inc < 1)
234 {
235 x_inc = 1;
236 x_small = TRUE;
237 }
238 if (y_inc < 1)
239 {
240 y_inc = 1;
241 y_small = TRUE;
242 }
243
244 j = 0;
245
246 for (ys = 0; ys < 32; ys++)
247 {
248 if (y_small) j = (gdouble)h / 32 * ys;
249
250 i = 0;
251
252 for (xs = 0; xs < 32; xs++)
253 {
254 gint x, y;
255 gint r, g, b;
256 gint t;
257
258 if (x_small) i = (gdouble)w / 32 * xs;
259
260 r = g = b = 0;
261
262 for (y = j; y < j + y_inc; y++)
263 {
264 p = pix + (y * rs) + (i * p_step);
265 for (x = i; x < i + x_inc; x++)
266 {
267 r += *p; p++;
268 g += *p; p++;
269 b += *p; p++;
270 if (has_alpha) p++;
271 }
272 }
273
274 t = x_inc * y_inc;
275 r /= t;
276 g /= t;
277 b /= t;
278
279 t = ys * 32 + xs;
280 sd->avg_r[t] = r;
281 sd->avg_g[t] = g;
282 sd->avg_b[t] = b;
283
284 i += x_inc;
285 }
286
287 j += y_inc;
288 }
289
290 sd->filled = TRUE;
291 }
292
293 ImageSimilarityData *image_sim_new_from_pixbuf(GdkPixbuf *pixbuf)
294 {
295 ImageSimilarityData *sd;
296
297 sd = image_sim_new();
298 image_sim_fill_data(sd, pixbuf);
299
300 return sd;
301 }
302
303 #ifdef ALTERNATE_INCLUDE_COMPARE_CHANGE
304 static gdouble alternate_image_sim_compare_fast(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min)
305 {
306 gint sim;
307 gint i;
308 gint j;
309 gint ld;
310
311 if (!a || !b || !a->filled || !b->filled) return 0.0;
312
313
314 min = 1.0 - min;
315 sim = 0.0;
316 ld = 0;
317
318 for (j = 0; j < 1024; j+= 32)
319 {
320 for (i = j; i < j + 32; i++)
321 {
322 gint cr, cg, cb;
323 gint cd;
324
325 cr = abs(a->avg_r[i] - b->avg_r[i]);
326 cg = abs(a->avg_g[i] - b->avg_g[i]);
327 cb = abs(a->avg_b[i] - b->avg_b[i]);
328
329 cd = cr + cg + cb;
330 sim += cd + abs(cd - ld);
331 ld = cd / 3;
332 }
333 /* check for abort, if so return 0.0 */
334 if ((gdouble)sim / (255.0 * 1024.0 * 4.0) > min) return 0.0;
335 }
336
337 return (1.0 - ((gdouble)sim / (255.0 * 1024.0 * 4.0)) );
338 }
339 #endif
340
341 gdouble image_sim_compare(ImageSimilarityData *a, ImageSimilarityData *b)
342 {
343 gint sim;
344 gint i;
345
346 if (!a || !b || !a->filled || !b->filled) return 0.0;
347
348 sim = 0.0;
349
350 for (i = 0; i < 1024; i++)
351 {
352 sim += abs(a->avg_r[i] - b->avg_r[i]);
353 sim += abs(a->avg_g[i] - b->avg_g[i]);
354 sim += abs(a->avg_b[i] - b->avg_b[i]);
355 }
356
357 return 1.0 - ((gdouble)sim / (255.0 * 1024.0 * 3.0));
358 }
359
360 /* this uses a cutoff point so that it can abort early when it gets to
361 * a point that can simply no longer make the cut-off point.
362 */
363 gdouble image_sim_compare_fast(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min)
364 {
365 gint sim;
366 gint i;
367 gint j;
368
369 #ifdef ALTERNATE_INCLUDE_COMPARE_CHANGE
370 if (alternate_enabled) return alternate_image_sim_compare_fast(a, b, min);
371 #endif
372
373 if (!a || !b || !a->filled || !b->filled) return 0.0;
374
375 min = 1.0 - min;
376 sim = 0.0;
377
378 for (j = 0; j < 1024; j+= 32)
379 {
380 for (i = j; i < j + 32; i++)
381 {
382 sim += abs(a->avg_r[i] - b->avg_r[i]);
383 sim += abs(a->avg_g[i] - b->avg_g[i]);
384 sim += abs(a->avg_b[i] - b->avg_b[i]);
385 }
386 /* check for abort, if so return 0.0 */
387 if ((gdouble)sim / (255.0 * 1024.0 * 3.0) > min) return 0.0;
388 }
389
390 return (1.0 - ((gdouble)sim / (255.0 * 1024.0 * 3.0)) );
391 }