Mercurial > libavcodec.hg
annotate elbg.c @ 12454:f4355cd85faa libavcodec
Port latest x264 deblock asm (before they moved to using NV12 as internal
format), LGPL'ed with permission from Jason and Loren. This includes mmx2
code, so remove inline asm from h264dsp_mmx.c accordingly.
author | rbultje |
---|---|
date | Fri, 03 Sep 2010 16:52:46 +0000 |
parents | 25e9cb2b9477 |
children |
rev | line source |
---|---|
5095 | 1 /* |
5219 | 2 * Copyright (C) 2007 Vitor Sessak <vitor1001@gmail.com> |
5095 | 3 * |
4 * This file is part of FFmpeg. | |
5 * | |
6 * FFmpeg is free software; you can redistribute it and/or | |
7 * modify it under the terms of the GNU Lesser General Public | |
8 * License as published by the Free Software Foundation; either | |
9 * version 2.1 of the License, or (at your option) any later version. | |
10 * | |
11 * FFmpeg is distributed in the hope that it will be useful, | |
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
17 * License along with FFmpeg; if not, write to the Free Software | |
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 */ | |
20 | |
21 /** | |
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
10762
diff
changeset
|
22 * @file |
5095 | 23 * Codebook Generator using the ELBG algorithm |
24 */ | |
25 | |
26 #include <string.h> | |
27 | |
9154
aa459306ee59
Use FLG pseudo-random number generator in RoQ and ELBG
vitor
parents:
8718
diff
changeset
|
28 #include "libavutil/lfg.h" |
5095 | 29 #include "elbg.h" |
30 #include "avcodec.h" | |
31 | |
32 #define DELTA_ERR_MAX 0.1 ///< Precision of the ELBG algorithm (as percentual error) | |
33 | |
34 /** | |
35 * In the ELBG jargon, a cell is the set of points that are closest to a | |
36 * codebook entry. Not to be confused with a RoQ Video cell. */ | |
37 typedef struct cell_s { | |
38 int index; | |
39 struct cell_s *next; | |
40 } cell; | |
41 | |
42 /** | |
43 * ELBG internal data | |
44 */ | |
45 typedef struct{ | |
46 int error; | |
47 int dim; | |
48 int numCB; | |
49 int *codebook; | |
50 cell **cells; | |
51 int *utility; | |
52 int *utility_inc; | |
53 int *nearest_cb; | |
54 int *points; | |
9154
aa459306ee59
Use FLG pseudo-random number generator in RoQ and ELBG
vitor
parents:
8718
diff
changeset
|
55 AVLFG *rand_state; |
11955 | 56 int *scratchbuf; |
5095 | 57 } elbg_data; |
58 | |
59 static inline int distance_limited(int *a, int *b, int dim, int limit) | |
60 { | |
61 int i, dist=0; | |
62 for (i=0; i<dim; i++) { | |
63 dist += (a[i] - b[i])*(a[i] - b[i]); | |
64 if (dist > limit) | |
65 return INT_MAX; | |
66 } | |
67 | |
68 return dist; | |
69 } | |
70 | |
71 static inline void vect_division(int *res, int *vect, int div, int dim) | |
72 { | |
73 int i; | |
74 if (div > 1) | |
75 for (i=0; i<dim; i++) | |
76 res[i] = ROUNDED_DIV(vect[i],div); | |
77 else if (res != vect) | |
78 memcpy(res, vect, dim*sizeof(int)); | |
79 | |
80 } | |
81 | |
82 static int eval_error_cell(elbg_data *elbg, int *centroid, cell *cells) | |
83 { | |
84 int error=0; | |
85 for (; cells; cells=cells->next) | |
86 error += distance_limited(centroid, elbg->points + cells->index*elbg->dim, elbg->dim, INT_MAX); | |
87 | |
88 return error; | |
89 } | |
90 | |
91 static int get_closest_codebook(elbg_data *elbg, int index) | |
92 { | |
93 int i, pick=0, diff, diff_min = INT_MAX; | |
94 for (i=0; i<elbg->numCB; i++) | |
95 if (i != index) { | |
96 diff = distance_limited(elbg->codebook + i*elbg->dim, elbg->codebook + index*elbg->dim, elbg->dim, diff_min); | |
97 if (diff < diff_min) { | |
98 pick = i; | |
99 diff_min = diff; | |
100 } | |
101 } | |
102 return pick; | |
103 } | |
104 | |
105 static int get_high_utility_cell(elbg_data *elbg) | |
106 { | |
107 int i=0; | |
108 /* Using linear search, do binary if it ever turns to be speed critical */ | |
9160 | 109 int r = av_lfg_get(elbg->rand_state)%elbg->utility_inc[elbg->numCB-1] + 1; |
5095 | 110 while (elbg->utility_inc[i] < r) |
111 i++; | |
7352
c2318e551ff5
When picking a "high utility centroid" do not pick one
vitor
parents:
7351
diff
changeset
|
112 |
c2318e551ff5
When picking a "high utility centroid" do not pick one
vitor
parents:
7351
diff
changeset
|
113 assert(!elbg->cells[i]); |
c2318e551ff5
When picking a "high utility centroid" do not pick one
vitor
parents:
7351
diff
changeset
|
114 |
5095 | 115 return i; |
116 } | |
117 | |
118 /** | |
119 * Implementation of the simple LBG algorithm for just two codebooks | |
120 */ | |
11955 | 121 static int simple_lbg(elbg_data *elbg, |
122 int dim, | |
5122 | 123 int *centroid[3], |
5095 | 124 int newutility[3], |
125 int *points, | |
126 cell *cells) | |
127 { | |
128 int i, idx; | |
129 int numpoints[2] = {0,0}; | |
11955 | 130 int *newcentroid[2] = { |
131 elbg->scratchbuf + 3*dim, | |
132 elbg->scratchbuf + 4*dim | |
133 }; | |
5095 | 134 cell *tempcell; |
135 | |
11955 | 136 memset(newcentroid[0], 0, 2 * dim * sizeof(*newcentroid[0])); |
5095 | 137 |
138 newutility[0] = | |
139 newutility[1] = 0; | |
140 | |
141 for (tempcell = cells; tempcell; tempcell=tempcell->next) { | |
142 idx = distance_limited(centroid[0], points + tempcell->index*dim, dim, INT_MAX)>= | |
143 distance_limited(centroid[1], points + tempcell->index*dim, dim, INT_MAX); | |
144 numpoints[idx]++; | |
145 for (i=0; i<dim; i++) | |
146 newcentroid[idx][i] += points[tempcell->index*dim + i]; | |
147 } | |
148 | |
149 vect_division(centroid[0], newcentroid[0], numpoints[0], dim); | |
150 vect_division(centroid[1], newcentroid[1], numpoints[1], dim); | |
151 | |
152 for (tempcell = cells; tempcell; tempcell=tempcell->next) { | |
153 int dist[2] = {distance_limited(centroid[0], points + tempcell->index*dim, dim, INT_MAX), | |
154 distance_limited(centroid[1], points + tempcell->index*dim, dim, INT_MAX)}; | |
155 int idx = dist[0] > dist[1]; | |
156 newutility[idx] += dist[idx]; | |
157 } | |
158 | |
159 return newutility[0] + newutility[1]; | |
160 } | |
161 | |
162 static void get_new_centroids(elbg_data *elbg, int huc, int *newcentroid_i, | |
163 int *newcentroid_p) | |
164 { | |
165 cell *tempcell; | |
11955 | 166 int *min = newcentroid_i; |
167 int *max = newcentroid_p; | |
5095 | 168 int i; |
169 | |
170 for (i=0; i< elbg->dim; i++) { | |
171 min[i]=INT_MAX; | |
172 max[i]=0; | |
173 } | |
174 | |
175 for (tempcell = elbg->cells[huc]; tempcell; tempcell = tempcell->next) | |
176 for(i=0; i<elbg->dim; i++) { | |
177 min[i]=FFMIN(min[i], elbg->points[tempcell->index*elbg->dim + i]); | |
178 max[i]=FFMAX(max[i], elbg->points[tempcell->index*elbg->dim + i]); | |
179 } | |
180 | |
181 for (i=0; i<elbg->dim; i++) { | |
11955 | 182 int ni = min[i] + (max[i] - min[i])/3; |
183 int np = min[i] + (2*(max[i] - min[i]))/3; | |
184 newcentroid_i[i] = ni; | |
185 newcentroid_p[i] = np; | |
5095 | 186 } |
187 } | |
188 | |
189 /** | |
190 * Add the points in the low utility cell to its closest cell. Split the high | |
191 * utility cell, putting the separed points in the (now empty) low utility | |
192 * cell. | |
193 * | |
194 * @param elbg Internal elbg data | |
195 * @param indexes {luc, huc, cluc} | |
196 * @param newcentroid A vector with the position of the new centroids | |
197 */ | |
198 static void shift_codebook(elbg_data *elbg, int *indexes, | |
5122 | 199 int *newcentroid[3]) |
5095 | 200 { |
201 cell *tempdata; | |
202 cell **pp = &elbg->cells[indexes[2]]; | |
203 | |
204 while(*pp) | |
205 pp= &(*pp)->next; | |
206 | |
207 *pp = elbg->cells[indexes[0]]; | |
208 | |
209 elbg->cells[indexes[0]] = NULL; | |
210 tempdata = elbg->cells[indexes[1]]; | |
211 elbg->cells[indexes[1]] = NULL; | |
212 | |
213 while(tempdata) { | |
214 cell *tempcell2 = tempdata->next; | |
215 int idx = distance_limited(elbg->points + tempdata->index*elbg->dim, | |
216 newcentroid[0], elbg->dim, INT_MAX) > | |
217 distance_limited(elbg->points + tempdata->index*elbg->dim, | |
218 newcentroid[1], elbg->dim, INT_MAX); | |
219 | |
220 tempdata->next = elbg->cells[indexes[idx]]; | |
221 elbg->cells[indexes[idx]] = tempdata; | |
222 tempdata = tempcell2; | |
223 } | |
224 } | |
225 | |
226 static void evaluate_utility_inc(elbg_data *elbg) | |
227 { | |
228 int i, inc=0; | |
229 | |
230 for (i=0; i < elbg->numCB; i++) { | |
231 if (elbg->numCB*elbg->utility[i] > elbg->error) | |
232 inc += elbg->utility[i]; | |
233 elbg->utility_inc[i] = inc; | |
234 } | |
235 } | |
236 | |
237 | |
238 static void update_utility_and_n_cb(elbg_data *elbg, int idx, int newutility) | |
239 { | |
240 cell *tempcell; | |
241 | |
242 elbg->utility[idx] = newutility; | |
243 for (tempcell=elbg->cells[idx]; tempcell; tempcell=tempcell->next) | |
244 elbg->nearest_cb[tempcell->index] = idx; | |
245 } | |
246 | |
247 /** | |
248 * Evaluate if a shift lower the error. If it does, call shift_codebooks | |
249 * and update elbg->error, elbg->utility and elbg->nearest_cb. | |
250 * | |
251 * @param elbg Internal elbg data | |
12056
25e9cb2b9477
Fix misspelled parameter names in Doxygen documentation.
diego
parents:
11955
diff
changeset
|
252 * @param idx {luc (low utility cell, huc (high utility cell), cluc (closest cell to low utility cell)} |
5095 | 253 */ |
254 static void try_shift_candidate(elbg_data *elbg, int idx[3]) | |
255 { | |
256 int j, k, olderror=0, newerror, cont=0; | |
257 int newutility[3]; | |
11955 | 258 int *newcentroid[3] = { |
259 elbg->scratchbuf, | |
260 elbg->scratchbuf + elbg->dim, | |
261 elbg->scratchbuf + 2*elbg->dim | |
262 }; | |
5095 | 263 cell *tempcell; |
264 | |
265 for (j=0; j<3; j++) | |
266 olderror += elbg->utility[idx[j]]; | |
267 | |
268 memset(newcentroid[2], 0, elbg->dim*sizeof(int)); | |
269 | |
270 for (k=0; k<2; k++) | |
271 for (tempcell=elbg->cells[idx[2*k]]; tempcell; tempcell=tempcell->next) { | |
272 cont++; | |
273 for (j=0; j<elbg->dim; j++) | |
274 newcentroid[2][j] += elbg->points[tempcell->index*elbg->dim + j]; | |
275 } | |
276 | |
277 vect_division(newcentroid[2], newcentroid[2], cont, elbg->dim); | |
278 | |
279 get_new_centroids(elbg, idx[1], newcentroid[0], newcentroid[1]); | |
280 | |
281 newutility[2] = eval_error_cell(elbg, newcentroid[2], elbg->cells[idx[0]]); | |
282 newutility[2] += eval_error_cell(elbg, newcentroid[2], elbg->cells[idx[2]]); | |
283 | |
284 newerror = newutility[2]; | |
285 | |
11955 | 286 newerror += simple_lbg(elbg, elbg->dim, newcentroid, newutility, elbg->points, |
5095 | 287 elbg->cells[idx[1]]); |
288 | |
289 if (olderror > newerror) { | |
11955 | 290 shift_codebook(elbg, idx, newcentroid); |
5095 | 291 |
292 elbg->error += newerror - olderror; | |
293 | |
294 for (j=0; j<3; j++) | |
295 update_utility_and_n_cb(elbg, idx[j], newutility[j]); | |
296 | |
297 evaluate_utility_inc(elbg); | |
298 } | |
299 } | |
300 | |
301 /** | |
302 * Implementation of the ELBG block | |
303 */ | |
304 static void do_shiftings(elbg_data *elbg) | |
305 { | |
306 int idx[3]; | |
307 | |
308 evaluate_utility_inc(elbg); | |
309 | |
310 for (idx[0]=0; idx[0] < elbg->numCB; idx[0]++) | |
311 if (elbg->numCB*elbg->utility[idx[0]] < elbg->error) { | |
312 if (elbg->utility_inc[elbg->numCB-1] == 0) | |
313 return; | |
314 | |
7354
456957d86106
My commit at r14340 was not the right solution. For a monochromatic
vitor
parents:
7353
diff
changeset
|
315 idx[1] = get_high_utility_cell(elbg); |
5095 | 316 idx[2] = get_closest_codebook(elbg, idx[0]); |
317 | |
7354
456957d86106
My commit at r14340 was not the right solution. For a monochromatic
vitor
parents:
7353
diff
changeset
|
318 if (idx[1] != idx[0] && idx[1] != idx[2]) |
456957d86106
My commit at r14340 was not the right solution. For a monochromatic
vitor
parents:
7353
diff
changeset
|
319 try_shift_candidate(elbg, idx); |
5095 | 320 } |
321 } | |
322 | |
323 #define BIG_PRIME 433494437LL | |
324 | |
325 void ff_init_elbg(int *points, int dim, int numpoints, int *codebook, | |
326 int numCB, int max_steps, int *closest_cb, | |
9154
aa459306ee59
Use FLG pseudo-random number generator in RoQ and ELBG
vitor
parents:
8718
diff
changeset
|
327 AVLFG *rand_state) |
5095 | 328 { |
329 int i, k; | |
330 | |
331 if (numpoints > 24*numCB) { | |
332 /* ELBG is very costly for a big number of points. So if we have a lot | |
333 of them, get a good initial codebook to save on iterations */ | |
334 int *temp_points = av_malloc(dim*(numpoints/8)*sizeof(int)); | |
335 for (i=0; i<numpoints/8; i++) { | |
336 k = (i*BIG_PRIME) % numpoints; | |
337 memcpy(temp_points + i*dim, points + k*dim, dim*sizeof(int)); | |
338 } | |
339 | |
340 ff_init_elbg(temp_points, dim, numpoints/8, codebook, numCB, 2*max_steps, closest_cb, rand_state); | |
341 ff_do_elbg(temp_points, dim, numpoints/8, codebook, numCB, 2*max_steps, closest_cb, rand_state); | |
342 | |
343 av_free(temp_points); | |
344 | |
345 } else // If not, initialize the codebook with random positions | |
346 for (i=0; i < numCB; i++) | |
347 memcpy(codebook + i*dim, points + ((i*BIG_PRIME)%numpoints)*dim, | |
348 dim*sizeof(int)); | |
349 | |
350 } | |
351 | |
352 void ff_do_elbg(int *points, int dim, int numpoints, int *codebook, | |
353 int numCB, int max_steps, int *closest_cb, | |
9154
aa459306ee59
Use FLG pseudo-random number generator in RoQ and ELBG
vitor
parents:
8718
diff
changeset
|
354 AVLFG *rand_state) |
5095 | 355 { |
356 int dist; | |
357 elbg_data elbg_d; | |
358 elbg_data *elbg = &elbg_d; | |
359 int i, j, k, last_error, steps=0; | |
360 int *dist_cb = av_malloc(numpoints*sizeof(int)); | |
361 int *size_part = av_malloc(numCB*sizeof(int)); | |
362 cell *list_buffer = av_malloc(numpoints*sizeof(cell)); | |
363 cell *free_cells; | |
10762
a4842fc20ac8
Small ELBG optimization: use last pixel as a initial guess for the codebook
vitor
parents:
9160
diff
changeset
|
364 int best_dist, best_idx = 0; |
5095 | 365 |
366 elbg->error = INT_MAX; | |
367 elbg->dim = dim; | |
368 elbg->numCB = numCB; | |
369 elbg->codebook = codebook; | |
370 elbg->cells = av_malloc(numCB*sizeof(cell *)); | |
371 elbg->utility = av_malloc(numCB*sizeof(int)); | |
372 elbg->nearest_cb = closest_cb; | |
373 elbg->points = points; | |
374 elbg->utility_inc = av_malloc(numCB*sizeof(int)); | |
11955 | 375 elbg->scratchbuf = av_malloc(5*dim*sizeof(int)); |
5095 | 376 |
377 elbg->rand_state = rand_state; | |
378 | |
379 do { | |
380 free_cells = list_buffer; | |
381 last_error = elbg->error; | |
382 steps++; | |
383 memset(elbg->utility, 0, numCB*sizeof(int)); | |
384 memset(elbg->cells, 0, numCB*sizeof(cell *)); | |
385 | |
386 elbg->error = 0; | |
387 | |
388 /* This loop evaluate the actual Voronoi partition. It is the most | |
389 costly part of the algorithm. */ | |
390 for (i=0; i < numpoints; i++) { | |
10762
a4842fc20ac8
Small ELBG optimization: use last pixel as a initial guess for the codebook
vitor
parents:
9160
diff
changeset
|
391 best_dist = distance_limited(elbg->points + i*elbg->dim, elbg->codebook + best_idx*elbg->dim, dim, INT_MAX); |
5095 | 392 for (k=0; k < elbg->numCB; k++) { |
10762
a4842fc20ac8
Small ELBG optimization: use last pixel as a initial guess for the codebook
vitor
parents:
9160
diff
changeset
|
393 dist = distance_limited(elbg->points + i*elbg->dim, elbg->codebook + k*elbg->dim, dim, best_dist); |
a4842fc20ac8
Small ELBG optimization: use last pixel as a initial guess for the codebook
vitor
parents:
9160
diff
changeset
|
394 if (dist < best_dist) { |
a4842fc20ac8
Small ELBG optimization: use last pixel as a initial guess for the codebook
vitor
parents:
9160
diff
changeset
|
395 best_dist = dist; |
a4842fc20ac8
Small ELBG optimization: use last pixel as a initial guess for the codebook
vitor
parents:
9160
diff
changeset
|
396 best_idx = k; |
5095 | 397 } |
398 } | |
10762
a4842fc20ac8
Small ELBG optimization: use last pixel as a initial guess for the codebook
vitor
parents:
9160
diff
changeset
|
399 elbg->nearest_cb[i] = best_idx; |
a4842fc20ac8
Small ELBG optimization: use last pixel as a initial guess for the codebook
vitor
parents:
9160
diff
changeset
|
400 dist_cb[i] = best_dist; |
5095 | 401 elbg->error += dist_cb[i]; |
402 elbg->utility[elbg->nearest_cb[i]] += dist_cb[i]; | |
403 free_cells->index = i; | |
404 free_cells->next = elbg->cells[elbg->nearest_cb[i]]; | |
405 elbg->cells[elbg->nearest_cb[i]] = free_cells; | |
406 free_cells++; | |
407 } | |
408 | |
409 do_shiftings(elbg); | |
410 | |
411 memset(size_part, 0, numCB*sizeof(int)); | |
412 | |
413 memset(elbg->codebook, 0, elbg->numCB*dim*sizeof(int)); | |
414 | |
415 for (i=0; i < numpoints; i++) { | |
416 size_part[elbg->nearest_cb[i]]++; | |
417 for (j=0; j < elbg->dim; j++) | |
418 elbg->codebook[elbg->nearest_cb[i]*elbg->dim + j] += | |
419 elbg->points[i*elbg->dim + j]; | |
420 } | |
421 | |
422 for (i=0; i < elbg->numCB; i++) | |
423 vect_division(elbg->codebook + i*elbg->dim, | |
424 elbg->codebook + i*elbg->dim, size_part[i], elbg->dim); | |
425 | |
426 } while(((last_error - elbg->error) > DELTA_ERR_MAX*elbg->error) && | |
427 (steps < max_steps)); | |
428 | |
429 av_free(dist_cb); | |
430 av_free(size_part); | |
431 av_free(elbg->utility); | |
432 av_free(list_buffer); | |
433 av_free(elbg->cells); | |
434 av_free(elbg->utility_inc); | |
11955 | 435 av_free(elbg->scratchbuf); |
5095 | 436 } |