Mercurial > geeqie.yaz
annotate src/similar.c @ 1000:4fe8f9656107
For the sake of consistency, use glib basic types everywhere.
author | zas_ |
---|---|
date | Tue, 26 Aug 2008 22:22:51 +0000 |
parents | d27b4184ceb8 |
children | 3096a47232ec |
rev | line source |
---|---|
9 | 1 /* |
196 | 2 * Geeqie |
9 | 3 * (C) 2004 John Ellis |
475 | 4 * Copyright (C) 2008 The Geeqie Team |
9 | 5 * |
6 * Author: John Ellis | |
7 * | |
8 * This software is released under the GNU General Public License (GNU GPL). | |
9 * Please read the included file COPYING for more information. | |
10 * This software comes with no warranty of any kind, use at your own risk! | |
11 */ | |
12 | |
13 | |
281 | 14 #include "main.h" |
9 | 15 #include "similar.h" |
16 | |
17 /* | |
18 * These functions are intended to find images with similar color content. For | |
19 * example when an image was saved at different compression levels or dimensions | |
20 * (scaled down/up) the contents are similar, but these files do not match by file | |
21 * size, dimensions, or checksum. | |
22 * | |
23 * These functions create a 32 x 32 array for each color channel (red, green, blue). | |
24 * The array represents the average color of each corresponding part of the | |
25 * image. (imagine the image cut into 1024 rectangles, or a 32 x 32 grid. | |
26 * Each grid is then processed for the average color value, this is what | |
27 * is stored in the array) | |
28 * | |
29 * To compare two images, generate a ImageSimilarityData for each image, then | |
30 * pass them to the compare function. The return value is the percent match | |
31 * of the two images. (for this, simple comparisons are used, basically the return | |
32 * is an average of the corresponding array differences) | |
33 * | |
34 * for image_sim_compare(), the return is 0.0 to 1.0: | |
35 * 1.0 for exact matches (an image is compared to itself) | |
36 * 0.0 for exact opposite images (compare an all black to an all white image) | |
37 * generally only a match of > 0.85 are significant at all, and >.95 is useful to | |
38 * find images that have been re-saved to other formats, dimensions, or compression. | |
39 */ | |
40 | |
41 | |
42 /* | |
43 * The experimental (alternate) algorithm is only for testing of new techniques to | |
44 * improve the result, and hopes to reduce false positives. | |
45 */ | |
46 | |
47 static gint alternate_enabled = FALSE; | |
48 | |
49 void image_sim_alternate_set(gint enable) | |
50 { | |
51 alternate_enabled = enable; | |
52 } | |
53 | |
54 gint image_sim_alternate_enabled(void) | |
55 { | |
56 return alternate_enabled; | |
57 } | |
58 | |
59 ImageSimilarityData *image_sim_new(void) | |
60 { | |
61 ImageSimilarityData *sd = g_new0(ImageSimilarityData, 1); | |
62 | |
63 return sd; | |
64 } | |
65 | |
66 void image_sim_free(ImageSimilarityData *sd) | |
67 { | |
68 g_free(sd); | |
69 } | |
70 | |
1000
4fe8f9656107
For the sake of consistency, use glib basic types everywhere.
zas_
parents:
927
diff
changeset
|
71 static gint image_sim_channel_eq_sort_cb(const void *a, const void *b) |
9 | 72 { |
73 gint *pa = (void *)a; | |
74 gint *pb = (void *)b; | |
75 if (pa[1] < pb[1]) return -1; | |
76 if (pa[1] > pb[1]) return 1; | |
77 return 0; | |
78 } | |
79 | |
80 static void image_sim_channel_equal(guint8 *pix, gint len) | |
81 { | |
82 gint *buf; | |
83 gint i; | |
84 gint p; | |
85 | |
86 buf = g_new0(gint, len * 2); | |
87 | |
88 p = 0; | |
89 for (i = 0; i < len; i++) | |
90 { | |
91 buf[p] = i; | |
92 p++; | |
93 buf[p] = (gint)pix[i]; | |
94 p++; | |
95 } | |
96 | |
512
f9bf33be53ff
Remove whitespace between function name and first parenthesis for the sake of consistency.
zas_
parents:
475
diff
changeset
|
97 qsort(buf, len, sizeof(gint) * 2, image_sim_channel_eq_sort_cb); |
9 | 98 |
99 p = 0; | |
100 for (i = 0; i < len; i++) | |
101 { | |
102 gint n; | |
103 | |
104 n = buf[p]; | |
927 | 105 p += 2; |
9 | 106 pix[n] = (guint8)(255 * i / len); |
107 } | |
108 | |
109 g_free(buf); | |
110 } | |
111 | |
112 static void image_sim_channel_norm(guint8 *pix, gint len) | |
113 { | |
927 | 114 guint8 l, h, delta; |
9 | 115 gint i; |
116 gdouble scale; | |
117 | |
118 l = h = pix[0]; | |
119 | |
120 for (i = 0; i < len; i++) | |
121 { | |
122 if (pix[i] < l) l = pix[i]; | |
123 if (pix[i] > h) h = pix[i]; | |
124 } | |
125 | |
927 | 126 delta = h - l; |
127 scale = (delta != 0) ? 255.0 / (gdouble)(delta) : 1.0; | |
9 | 128 |
129 for (i = 0; i < len; i++) | |
130 { | |
131 pix[i] = (guint8)((gdouble)(pix[i] - l) * scale); | |
132 } | |
133 } | |
134 | |
135 /* | |
136 * define these to enable various components of the experimental compare functions | |
137 * | |
138 * Convert the thumbprint to greyscale (ignore all color information when comparing) | |
139 * #define ALTERNATE_USES_GREYSCALE 1 | |
140 * | |
141 * Take into account the difference in change from one pixel to the next | |
142 * #define ALTERNATE_INCLUDE_COMPARE_CHANGE 1 | |
143 */ | |
144 | |
145 void image_sim_alternate_processing(ImageSimilarityData *sd) | |
146 { | |
147 #ifdef ALTERNATE_USES_GREYSCALE | |
148 gint i; | |
149 #endif | |
150 | |
151 if (!alternate_enabled) return; | |
442 | 152 |
9 | 153 image_sim_channel_norm(sd->avg_r, sizeof(sd->avg_r)); |
154 image_sim_channel_norm(sd->avg_g, sizeof(sd->avg_g)); | |
155 image_sim_channel_norm(sd->avg_b, sizeof(sd->avg_b)); | |
156 | |
157 image_sim_channel_equal(sd->avg_r, sizeof(sd->avg_r)); | |
158 image_sim_channel_equal(sd->avg_g, sizeof(sd->avg_g)); | |
159 image_sim_channel_equal(sd->avg_b, sizeof(sd->avg_b)); | |
160 | |
161 #ifdef ALTERNATE_USES_GREYSCALE | |
162 for (i = 0; i < sizeof(sd->avg_r); i++) | |
163 { | |
164 guint8 n; | |
442 | 165 |
9 | 166 n = (guint8)((gint)(sd->avg_r[i] + sd->avg_g[i] + sd->avg_b[i]) / 3); |
167 sd->avg_r[i] = sd->avg_g[i] = sd->avg_b[i] = n; | |
168 } | |
169 #endif | |
170 } | |
171 | |
172 void image_sim_fill_data(ImageSimilarityData *sd, GdkPixbuf *pixbuf) | |
173 { | |
174 gint w, h; | |
175 gint rs; | |
176 guchar *pix; | |
177 gint has_alpha; | |
178 gint p_step; | |
179 | |
180 guchar *p; | |
181 gint i; | |
182 gint j; | |
927 | 183 gint x_inc, y_inc, xy_inc; |
9 | 184 gint xs, ys; |
185 | |
186 gint x_small = FALSE; /* if less than 32 w or h, set TRUE */ | |
187 gint y_small = FALSE; | |
188 | |
189 if (!sd || !pixbuf) return; | |
190 | |
191 w = gdk_pixbuf_get_width(pixbuf); | |
192 h = gdk_pixbuf_get_height(pixbuf); | |
193 rs = gdk_pixbuf_get_rowstride(pixbuf); | |
194 pix = gdk_pixbuf_get_pixels(pixbuf); | |
195 has_alpha = gdk_pixbuf_get_has_alpha(pixbuf); | |
196 | |
197 p_step = has_alpha ? 4 : 3; | |
198 x_inc = w / 32; | |
199 y_inc = h / 32; | |
200 | |
201 if (x_inc < 1) | |
202 { | |
203 x_inc = 1; | |
204 x_small = TRUE; | |
205 } | |
206 if (y_inc < 1) | |
207 { | |
208 y_inc = 1; | |
209 y_small = TRUE; | |
210 } | |
211 | |
927 | 212 xy_inc = x_inc * y_inc; |
213 | |
9 | 214 j = 0; |
215 | |
216 for (ys = 0; ys < 32; ys++) | |
217 { | |
218 if (y_small) j = (gdouble)h / 32 * ys; | |
219 | |
220 i = 0; | |
221 | |
222 for (xs = 0; xs < 32; xs++) | |
223 { | |
224 gint x, y; | |
225 gint r, g, b; | |
226 gint t; | |
927 | 227 guchar *xpos; |
9 | 228 |
229 if (x_small) i = (gdouble)w / 32 * xs; | |
230 | |
231 r = g = b = 0; | |
927 | 232 xpos = pix + (i * p_step); |
9 | 233 |
234 for (y = j; y < j + y_inc; y++) | |
235 { | |
927 | 236 p = xpos + (y * rs); |
9 | 237 for (x = i; x < i + x_inc; x++) |
238 { | |
239 r += *p; p++; | |
240 g += *p; p++; | |
241 b += *p; p++; | |
242 if (has_alpha) p++; | |
243 } | |
244 } | |
245 | |
927 | 246 r /= xy_inc; |
247 g /= xy_inc; | |
248 b /= xy_inc; | |
9 | 249 |
250 t = ys * 32 + xs; | |
251 sd->avg_r[t] = r; | |
252 sd->avg_g[t] = g; | |
253 sd->avg_b[t] = b; | |
254 | |
255 i += x_inc; | |
256 } | |
257 | |
258 j += y_inc; | |
259 } | |
260 | |
261 sd->filled = TRUE; | |
262 } | |
263 | |
264 ImageSimilarityData *image_sim_new_from_pixbuf(GdkPixbuf *pixbuf) | |
265 { | |
266 ImageSimilarityData *sd; | |
267 | |
268 sd = image_sim_new(); | |
269 image_sim_fill_data(sd, pixbuf); | |
270 | |
271 return sd; | |
272 } | |
273 | |
274 #ifdef ALTERNATE_INCLUDE_COMPARE_CHANGE | |
275 static gdouble alternate_image_sim_compare_fast(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min) | |
276 { | |
277 gint sim; | |
278 gint i; | |
279 gint j; | |
280 gint ld; | |
281 | |
282 if (!a || !b || !a->filled || !b->filled) return 0.0; | |
283 | |
284 min = 1.0 - min; | |
285 sim = 0.0; | |
286 ld = 0; | |
287 | |
927 | 288 for (j = 0; j < 1024; j += 32) |
9 | 289 { |
290 for (i = j; i < j + 32; i++) | |
291 { | |
292 gint cr, cg, cb; | |
293 gint cd; | |
294 | |
295 cr = abs(a->avg_r[i] - b->avg_r[i]); | |
296 cg = abs(a->avg_g[i] - b->avg_g[i]); | |
297 cb = abs(a->avg_b[i] - b->avg_b[i]); | |
298 | |
299 cd = cr + cg + cb; | |
300 sim += cd + abs(cd - ld); | |
301 ld = cd / 3; | |
302 } | |
303 /* check for abort, if so return 0.0 */ | |
304 if ((gdouble)sim / (255.0 * 1024.0 * 4.0) > min) return 0.0; | |
305 } | |
306 | |
307 return (1.0 - ((gdouble)sim / (255.0 * 1024.0 * 4.0)) ); | |
308 } | |
309 #endif | |
310 | |
311 gdouble image_sim_compare(ImageSimilarityData *a, ImageSimilarityData *b) | |
312 { | |
313 gint sim; | |
314 gint i; | |
315 | |
316 if (!a || !b || !a->filled || !b->filled) return 0.0; | |
317 | |
318 sim = 0.0; | |
319 | |
320 for (i = 0; i < 1024; i++) | |
321 { | |
322 sim += abs(a->avg_r[i] - b->avg_r[i]); | |
323 sim += abs(a->avg_g[i] - b->avg_g[i]); | |
324 sim += abs(a->avg_b[i] - b->avg_b[i]); | |
325 } | |
326 | |
327 return 1.0 - ((gdouble)sim / (255.0 * 1024.0 * 3.0)); | |
328 } | |
329 | |
330 /* this uses a cutoff point so that it can abort early when it gets to | |
331 * a point that can simply no longer make the cut-off point. | |
332 */ | |
333 gdouble image_sim_compare_fast(ImageSimilarityData *a, ImageSimilarityData *b, gdouble min) | |
334 { | |
335 gint sim; | |
336 gint i; | |
337 gint j; | |
338 | |
339 #ifdef ALTERNATE_INCLUDE_COMPARE_CHANGE | |
340 if (alternate_enabled) return alternate_image_sim_compare_fast(a, b, min); | |
341 #endif | |
342 | |
343 if (!a || !b || !a->filled || !b->filled) return 0.0; | |
344 | |
345 min = 1.0 - min; | |
346 sim = 0.0; | |
347 | |
927 | 348 for (j = 0; j < 1024; j += 32) |
9 | 349 { |
350 for (i = j; i < j + 32; i++) | |
351 { | |
352 sim += abs(a->avg_r[i] - b->avg_r[i]); | |
353 sim += abs(a->avg_g[i] - b->avg_g[i]); | |
354 sim += abs(a->avg_b[i] - b->avg_b[i]); | |
355 } | |
356 /* check for abort, if so return 0.0 */ | |
357 if ((gdouble)sim / (255.0 * 1024.0 * 3.0) > min) return 0.0; | |
358 } | |
359 | |
360 return (1.0 - ((gdouble)sim / (255.0 * 1024.0 * 3.0)) ); | |
361 } |