Mercurial > libavcodec.hg
annotate vp8dsp.c @ 12043:f9a0bd0888a4 libavcodec
mpegaudio: call ff_mpegaudiodec_init_mmx() only from float decoder
The mmx code is floating-point only, and this function does not know
from which decoder it is called. Without this change, the integer
decoder only "works" because the size of the context struct is smaller
in this case, and the mmx init function writes the function pointer
outside the allocated context.
author | mru |
---|---|
date | Thu, 01 Jul 2010 23:21:17 +0000 |
parents | f96187e79438 |
children | 812e23197d64 |
rev | line source |
---|---|
11921 | 1 /** |
2 * VP8 compatible video decoder | |
3 * | |
4 * Copyright (C) 2010 David Conrad | |
5 * Copyright (C) 2010 Ronald S. Bultje | |
6 * | |
7 * This file is part of FFmpeg. | |
8 * | |
9 * FFmpeg is free software; you can redistribute it and/or | |
10 * modify it under the terms of the GNU Lesser General Public | |
11 * License as published by the Free Software Foundation; either | |
12 * version 2.1 of the License, or (at your option) any later version. | |
13 * | |
14 * FFmpeg is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Lesser General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Lesser General Public | |
20 * License along with FFmpeg; if not, write to the Free Software | |
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 */ | |
23 | |
24 #include "dsputil.h" | |
25 #include "vp8dsp.h" | |
26 | |
27 // TODO: Maybe add dequant | |
28 static void vp8_luma_dc_wht_c(DCTELEM block[4][4][16], DCTELEM dc[16]) | |
29 { | |
30 int i, t0, t1, t2, t3; | |
31 | |
32 for (i = 0; i < 4; i++) { | |
33 t0 = dc[0*4+i] + dc[3*4+i]; | |
34 t1 = dc[1*4+i] + dc[2*4+i]; | |
35 t2 = dc[1*4+i] - dc[2*4+i]; | |
36 t3 = dc[0*4+i] - dc[3*4+i]; | |
37 | |
38 dc[0*4+i] = t0 + t1; | |
39 dc[1*4+i] = t3 + t2; | |
40 dc[2*4+i] = t0 - t1; | |
41 dc[3*4+i] = t3 - t2; | |
42 } | |
43 | |
44 for (i = 0; i < 4; i++) { | |
45 t0 = dc[i*4+0] + dc[i*4+3] + 3; // rounding | |
46 t1 = dc[i*4+1] + dc[i*4+2]; | |
47 t2 = dc[i*4+1] - dc[i*4+2]; | |
48 t3 = dc[i*4+0] - dc[i*4+3] + 3; // rounding | |
49 | |
50 *block[i][0] = (t0 + t1) >> 3; | |
51 *block[i][1] = (t3 + t2) >> 3; | |
52 *block[i][2] = (t0 - t1) >> 3; | |
53 *block[i][3] = (t3 - t2) >> 3; | |
54 } | |
55 } | |
56 | |
57 | |
58 #define MUL_20091(a) ((((a)*20091) >> 16) + (a)) | |
59 #define MUL_35468(a) (((a)*35468) >> 16) | |
60 | |
61 static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], int stride) | |
62 { | |
63 int i, t0, t1, t2, t3; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
64 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
11921 | 65 DCTELEM tmp[16]; |
66 | |
67 for (i = 0; i < 4; i++) { | |
68 t0 = block[0*4+i] + block[2*4+i]; | |
69 t1 = block[0*4+i] - block[2*4+i]; | |
70 t2 = MUL_35468(block[1*4+i]) - MUL_20091(block[3*4+i]); | |
71 t3 = MUL_20091(block[1*4+i]) + MUL_35468(block[3*4+i]); | |
72 | |
73 tmp[i*4+0] = t0 + t3; | |
74 tmp[i*4+1] = t1 + t2; | |
75 tmp[i*4+2] = t1 - t2; | |
76 tmp[i*4+3] = t0 - t3; | |
77 } | |
78 | |
79 for (i = 0; i < 4; i++) { | |
80 t0 = tmp[0*4+i] + tmp[2*4+i]; | |
81 t1 = tmp[0*4+i] - tmp[2*4+i]; | |
82 t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]); | |
83 t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]); | |
84 | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
85 dst[0] = cm[dst[0] + ((t0 + t3 + 4) >> 3)]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
86 dst[1] = cm[dst[1] + ((t1 + t2 + 4) >> 3)]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
87 dst[2] = cm[dst[2] + ((t1 - t2 + 4) >> 3)]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
88 dst[3] = cm[dst[3] + ((t0 - t3 + 4) >> 3)]; |
11921 | 89 dst += stride; |
90 } | |
91 } | |
92 | |
93 static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], int stride) | |
94 { | |
95 int i, dc = (block[0] + 4) >> 3; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
96 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc; |
11921 | 97 |
98 for (i = 0; i < 4; i++) { | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
99 dst[0] = cm[dst[0]]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
100 dst[1] = cm[dst[1]]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
101 dst[2] = cm[dst[2]]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
102 dst[3] = cm[dst[3]]; |
11921 | 103 dst += stride; |
104 } | |
105 } | |
106 | |
107 | |
108 // because I like only having two parameters to pass functions... | |
109 #define LOAD_PIXELS\ | |
110 int av_unused p3 = p[-4*stride];\ | |
111 int av_unused p2 = p[-3*stride];\ | |
112 int av_unused p1 = p[-2*stride];\ | |
113 int av_unused p0 = p[-1*stride];\ | |
114 int av_unused q0 = p[ 0*stride];\ | |
115 int av_unused q1 = p[ 1*stride];\ | |
116 int av_unused q2 = p[ 2*stride];\ | |
117 int av_unused q3 = p[ 3*stride]; | |
118 | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
119 #define clip_int8(n) (cm[n+0x80]-0x80) |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
120 |
11921 | 121 static av_always_inline void filter_common(uint8_t *p, int stride, int is4tap) |
122 { | |
123 LOAD_PIXELS | |
124 int a, f1, f2; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
125 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
11921 | 126 |
127 a = 3*(q0 - p0); | |
128 | |
129 if (is4tap) | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
130 a += clip_int8(p1 - q1); |
11921 | 131 |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
132 a = clip_int8(a); |
11921 | 133 |
134 // We deviate from the spec here with c(a+3) >> 3 | |
135 // since that's what libvpx does. | |
136 f1 = FFMIN(a+4, 127) >> 3; | |
137 f2 = FFMIN(a+3, 127) >> 3; | |
138 | |
139 // Despite what the spec says, we do need to clamp here to | |
140 // be bitexact with libvpx. | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
141 p[-1*stride] = cm[p0 + f2]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
142 p[ 0*stride] = cm[q0 - f1]; |
11921 | 143 |
144 // only used for _inner on blocks without high edge variance | |
145 if (!is4tap) { | |
146 a = (f1+1)>>1; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
147 p[-2*stride] = cm[p1 + a]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
148 p[ 1*stride] = cm[q1 - a]; |
11921 | 149 } |
150 } | |
151 | |
152 static av_always_inline int simple_limit(uint8_t *p, int stride, int flim) | |
153 { | |
154 LOAD_PIXELS | |
155 return 2*FFABS(p0-q0) + (FFABS(p1-q1) >> 1) <= flim; | |
156 } | |
157 | |
158 /** | |
159 * E - limit at the macroblock edge | |
160 * I - limit for interior difference | |
161 */ | |
162 static av_always_inline int normal_limit(uint8_t *p, int stride, int E, int I) | |
163 { | |
164 LOAD_PIXELS | |
165 return simple_limit(p, stride, 2*E+I) | |
166 && FFABS(p3-p2) <= I && FFABS(p2-p1) <= I && FFABS(p1-p0) <= I | |
167 && FFABS(q3-q2) <= I && FFABS(q2-q1) <= I && FFABS(q1-q0) <= I; | |
168 } | |
169 | |
170 // high edge variance | |
171 static av_always_inline int hev(uint8_t *p, int stride, int thresh) | |
172 { | |
173 LOAD_PIXELS | |
174 return FFABS(p1-p0) > thresh || FFABS(q1-q0) > thresh; | |
175 } | |
176 | |
177 static av_always_inline void filter_mbedge(uint8_t *p, int stride) | |
178 { | |
179 int a0, a1, a2, w; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
180 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
11921 | 181 |
182 LOAD_PIXELS | |
183 | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
184 w = clip_int8(p1-q1); |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
185 w = clip_int8(w + 3*(q0-p0)); |
11921 | 186 |
187 a0 = (27*w + 63) >> 7; | |
188 a1 = (18*w + 63) >> 7; | |
189 a2 = ( 9*w + 63) >> 7; | |
190 | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
191 p[-3*stride] = cm[p2 + a2]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
192 p[-2*stride] = cm[p1 + a1]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
193 p[-1*stride] = cm[p0 + a0]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
194 p[ 0*stride] = cm[q0 - a0]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
195 p[ 1*stride] = cm[q1 - a1]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
196 p[ 2*stride] = cm[q2 - a2]; |
11921 | 197 } |
198 | |
199 #define LOOP_FILTER(dir, size, stridea, strideb) \ | |
200 static void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, int stride,\ | |
201 int flim_E, int flim_I, int hev_thresh)\ | |
202 {\ | |
203 int i;\ | |
204 \ | |
205 for (i = 0; i < size; i++)\ | |
206 if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\ | |
207 if (hev(dst+i*stridea, strideb, hev_thresh))\ | |
208 filter_common(dst+i*stridea, strideb, 1);\ | |
209 else\ | |
210 filter_mbedge(dst+i*stridea, strideb);\ | |
211 }\ | |
212 }\ | |
213 \ | |
214 static void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, int stride,\ | |
215 int flim_E, int flim_I, int hev_thresh)\ | |
216 {\ | |
12008 | 217 int i;\ |
11921 | 218 \ |
219 for (i = 0; i < size; i++)\ | |
220 if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\ | |
12008 | 221 int hv = hev(dst+i*stridea, strideb, hev_thresh);\ |
222 if (hv) \ | |
223 filter_common(dst+i*stridea, strideb, 1);\ | |
224 else \ | |
225 filter_common(dst+i*stridea, strideb, 0);\ | |
11921 | 226 }\ |
227 } | |
228 | |
229 LOOP_FILTER(v, 16, 1, stride) | |
230 LOOP_FILTER(h, 16, stride, 1) | |
231 LOOP_FILTER(v, 8, 1, stride) | |
232 LOOP_FILTER(h, 8, stride, 1) | |
233 | |
234 static void vp8_v_loop_filter_simple_c(uint8_t *dst, int stride, int flim) | |
235 { | |
236 int i; | |
237 | |
238 for (i = 0; i < 16; i++) | |
239 if (simple_limit(dst+i, stride, flim)) | |
240 filter_common(dst+i, stride, 1); | |
241 } | |
242 | |
243 static void vp8_h_loop_filter_simple_c(uint8_t *dst, int stride, int flim) | |
244 { | |
245 int i; | |
246 | |
247 for (i = 0; i < 16; i++) | |
248 if (simple_limit(dst+i*stride, 1, flim)) | |
249 filter_common(dst+i*stride, 1, 1); | |
250 } | |
251 | |
252 static const uint8_t subpel_filters[7][6] = { | |
253 { 0, 6, 123, 12, 1, 0 }, | |
254 { 2, 11, 108, 36, 8, 1 }, | |
255 { 0, 9, 93, 50, 6, 0 }, | |
256 { 3, 16, 77, 77, 16, 3 }, | |
257 { 0, 6, 50, 93, 9, 0 }, | |
258 { 1, 8, 36, 108, 11, 2 }, | |
259 { 0, 1, 12, 123, 6, 0 }, | |
260 }; | |
261 | |
11950 | 262 #define PUT_PIXELS(WIDTH) \ |
263 static void put_vp8_pixels ## WIDTH ##_c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int x, int y) { \ | |
11956 | 264 int i; \ |
265 for (i = 0; i < h; i++, dst+= dststride, src+= srcstride) { \ | |
11950 | 266 memcpy(dst, src, WIDTH); \ |
267 } \ | |
268 } | |
269 | |
270 PUT_PIXELS(16) | |
271 PUT_PIXELS(8) | |
272 PUT_PIXELS(4) | |
11921 | 273 |
274 #define FILTER_6TAP(src, F, stride) \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
275 cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + F[0]*src[x-2*stride] + \ |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
276 F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + F[5]*src[x+3*stride] + 64) >> 7] |
11921 | 277 |
278 #define FILTER_4TAP(src, F, stride) \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
279 cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + \ |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
280 F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + 64) >> 7] |
11921 | 281 |
282 #define VP8_EPEL_H(SIZE, FILTER, FILTERNAME) \ | |
11950 | 283 static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \ |
11921 | 284 { \ |
285 const uint8_t *filter = subpel_filters[mx-1]; \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
286 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ |
11921 | 287 int x, y; \ |
288 \ | |
289 for (y = 0; y < h; y++) { \ | |
290 for (x = 0; x < SIZE; x++) \ | |
291 dst[x] = FILTER(src, filter, 1); \ | |
11950 | 292 dst += dststride; \ |
293 src += srcstride; \ | |
11921 | 294 } \ |
295 } | |
296 #define VP8_EPEL_V(SIZE, FILTER, FILTERNAME) \ | |
11950 | 297 static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \ |
11921 | 298 { \ |
299 const uint8_t *filter = subpel_filters[my-1]; \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
300 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ |
11921 | 301 int x, y; \ |
302 \ | |
303 for (y = 0; y < h; y++) { \ | |
304 for (x = 0; x < SIZE; x++) \ | |
11950 | 305 dst[x] = FILTER(src, filter, srcstride); \ |
306 dst += dststride; \ | |
307 src += srcstride; \ | |
11921 | 308 } \ |
309 } | |
310 #define VP8_EPEL_HV(SIZE, FILTERX, FILTERY, FILTERNAME) \ | |
11950 | 311 static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \ |
11921 | 312 { \ |
313 const uint8_t *filter = subpel_filters[mx-1]; \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
314 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ |
11921 | 315 int x, y; \ |
316 uint8_t tmp_array[(2*SIZE+5)*SIZE]; \ | |
317 uint8_t *tmp = tmp_array; \ | |
11950 | 318 src -= 2*srcstride; \ |
11921 | 319 \ |
320 for (y = 0; y < h+5; y++) { \ | |
321 for (x = 0; x < SIZE; x++) \ | |
322 tmp[x] = FILTERX(src, filter, 1); \ | |
323 tmp += SIZE; \ | |
11950 | 324 src += srcstride; \ |
11921 | 325 } \ |
326 \ | |
327 tmp = tmp_array + 2*SIZE; \ | |
328 filter = subpel_filters[my-1]; \ | |
329 \ | |
330 for (y = 0; y < h; y++) { \ | |
331 for (x = 0; x < SIZE; x++) \ | |
332 dst[x] = FILTERY(tmp, filter, SIZE); \ | |
11950 | 333 dst += dststride; \ |
11921 | 334 tmp += SIZE; \ |
335 } \ | |
336 } | |
337 | |
338 VP8_EPEL_H(16, FILTER_4TAP, h4) | |
339 VP8_EPEL_H(8, FILTER_4TAP, h4) | |
340 VP8_EPEL_H(4, FILTER_4TAP, h4) | |
341 VP8_EPEL_H(16, FILTER_6TAP, h6) | |
342 VP8_EPEL_H(8, FILTER_6TAP, h6) | |
343 VP8_EPEL_H(4, FILTER_6TAP, h6) | |
344 VP8_EPEL_V(16, FILTER_4TAP, v4) | |
345 VP8_EPEL_V(8, FILTER_4TAP, v4) | |
346 VP8_EPEL_V(4, FILTER_4TAP, v4) | |
347 VP8_EPEL_V(16, FILTER_6TAP, v6) | |
348 VP8_EPEL_V(8, FILTER_6TAP, v6) | |
349 VP8_EPEL_V(4, FILTER_6TAP, v6) | |
350 VP8_EPEL_HV(16, FILTER_4TAP, FILTER_4TAP, h4v4) | |
351 VP8_EPEL_HV(8, FILTER_4TAP, FILTER_4TAP, h4v4) | |
352 VP8_EPEL_HV(4, FILTER_4TAP, FILTER_4TAP, h4v4) | |
353 VP8_EPEL_HV(16, FILTER_4TAP, FILTER_6TAP, h4v6) | |
354 VP8_EPEL_HV(8, FILTER_4TAP, FILTER_6TAP, h4v6) | |
355 VP8_EPEL_HV(4, FILTER_4TAP, FILTER_6TAP, h4v6) | |
356 VP8_EPEL_HV(16, FILTER_6TAP, FILTER_4TAP, h6v4) | |
357 VP8_EPEL_HV(8, FILTER_6TAP, FILTER_4TAP, h6v4) | |
358 VP8_EPEL_HV(4, FILTER_6TAP, FILTER_4TAP, h6v4) | |
359 VP8_EPEL_HV(16, FILTER_6TAP, FILTER_6TAP, h6v6) | |
360 VP8_EPEL_HV(8, FILTER_6TAP, FILTER_6TAP, h6v6) | |
361 VP8_EPEL_HV(4, FILTER_6TAP, FILTER_6TAP, h6v6) | |
362 | |
11974 | 363 #define VP8_BILINEAR(SIZE) \ |
364 static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \ | |
365 { \ | |
366 int a = 8-mx, b = mx; \ | |
367 int x, y; \ | |
368 \ | |
369 for (y = 0; y < h; y++) { \ | |
370 for (x = 0; x < SIZE; x++) \ | |
371 dst[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \ | |
372 dst += stride; \ | |
373 src += stride; \ | |
374 } \ | |
375 } \ | |
376 static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \ | |
377 { \ | |
378 int c = 8-my, d = my; \ | |
379 int x, y; \ | |
380 \ | |
381 for (y = 0; y < h; y++) { \ | |
382 for (x = 0; x < SIZE; x++) \ | |
383 dst[x] = (c*src[x] + d*src[x+stride] + 4) >> 3; \ | |
384 dst += stride; \ | |
385 src += stride; \ | |
386 } \ | |
387 } \ | |
388 \ | |
389 static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \ | |
390 { \ | |
391 int a = 8-mx, b = mx; \ | |
392 int c = 8-my, d = my; \ | |
393 int x, y; \ | |
394 uint8_t tmp_array[(2*SIZE+1)*SIZE]; \ | |
395 uint8_t *tmp = tmp_array; \ | |
396 \ | |
397 for (y = 0; y < h+1; y++) { \ | |
398 for (x = 0; x < SIZE; x++) \ | |
399 tmp[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \ | |
400 tmp += SIZE; \ | |
401 src += stride; \ | |
402 } \ | |
403 \ | |
404 tmp = tmp_array; \ | |
405 \ | |
406 for (y = 0; y < h; y++) { \ | |
407 for (x = 0; x < SIZE; x++) \ | |
408 dst[x] = (c*tmp[x] + d*tmp[x+SIZE] + 4) >> 3; \ | |
409 dst += stride; \ | |
410 tmp += SIZE; \ | |
411 } \ | |
412 } | |
413 | |
414 VP8_BILINEAR(16) | |
415 VP8_BILINEAR(8) | |
416 VP8_BILINEAR(4) | |
417 | |
11921 | 418 #define VP8_MC_FUNC(IDX, SIZE) \ |
11950 | 419 dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ |
11921 | 420 dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \ |
421 dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \ | |
422 dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \ | |
423 dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \ | |
424 dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \ | |
425 dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \ | |
426 dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \ | |
427 dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c | |
428 | |
11974 | 429 #define VP8_BILINEAR_MC_FUNC(IDX, SIZE) \ |
430 dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ | |
431 dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \ | |
432 dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \ | |
433 dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] = put_vp8_bilinear ## SIZE ## _v_c; \ | |
434 dsp->put_vp8_bilinear_pixels_tab[IDX][1][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ | |
435 dsp->put_vp8_bilinear_pixels_tab[IDX][1][2] = put_vp8_bilinear ## SIZE ## _hv_c; \ | |
436 dsp->put_vp8_bilinear_pixels_tab[IDX][2][0] = put_vp8_bilinear ## SIZE ## _v_c; \ | |
437 dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ | |
438 dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c | |
439 | |
11921 | 440 av_cold void ff_vp8dsp_init(VP8DSPContext *dsp) |
441 { | |
442 dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c; | |
443 dsp->vp8_idct_add = vp8_idct_add_c; | |
444 dsp->vp8_idct_dc_add = vp8_idct_dc_add_c; | |
445 | |
446 dsp->vp8_v_loop_filter16 = vp8_v_loop_filter16_c; | |
447 dsp->vp8_h_loop_filter16 = vp8_h_loop_filter16_c; | |
448 dsp->vp8_v_loop_filter8 = vp8_v_loop_filter8_c; | |
449 dsp->vp8_h_loop_filter8 = vp8_h_loop_filter8_c; | |
450 | |
451 dsp->vp8_v_loop_filter16_inner = vp8_v_loop_filter16_inner_c; | |
452 dsp->vp8_h_loop_filter16_inner = vp8_h_loop_filter16_inner_c; | |
453 dsp->vp8_v_loop_filter8_inner = vp8_v_loop_filter8_inner_c; | |
454 dsp->vp8_h_loop_filter8_inner = vp8_h_loop_filter8_inner_c; | |
455 | |
456 dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c; | |
457 dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c; | |
458 | |
459 VP8_MC_FUNC(0, 16); | |
460 VP8_MC_FUNC(1, 8); | |
461 VP8_MC_FUNC(2, 4); | |
11974 | 462 |
463 VP8_BILINEAR_MC_FUNC(0, 16); | |
464 VP8_BILINEAR_MC_FUNC(1, 8); | |
465 VP8_BILINEAR_MC_FUNC(2, 4); | |
11975 | 466 |
11985 | 467 if (HAVE_MMX) |
11975 | 468 ff_vp8dsp_init_x86(dsp); |
12011 | 469 if (HAVE_ALTIVEC) |
470 ff_vp8dsp_init_altivec(dsp); | |
11921 | 471 } |