Mercurial > libavcodec.hg
annotate vp8dsp.c @ 12197:fbf4d5b1b664 libavcodec
Remove FF_MM_SSE2/3 flags for CPUs where this is generally not faster than
regular MMX code. Examples of this are the Core1 CPU. Instead, set a new flag,
FF_MM_SSE2/3SLOW, which can be checked for particular SSE2/3 functions that
have been checked specifically on such CPUs and are actually faster than
their MMX counterparts.
In addition, use this flag to enable particular VP8 and LPC SSE2 functions
that are faster than their MMX counterparts.
Based on a patch by Loren Merritt <lorenm AT u washington edu>.
author | rbultje |
---|---|
date | Mon, 19 Jul 2010 22:38:23 +0000 |
parents | 80b142c2e9f7 |
children | e08d65897115 |
rev | line source |
---|---|
11921 | 1 /** |
2 * VP8 compatible video decoder | |
3 * | |
4 * Copyright (C) 2010 David Conrad | |
5 * Copyright (C) 2010 Ronald S. Bultje | |
6 * | |
7 * This file is part of FFmpeg. | |
8 * | |
9 * FFmpeg is free software; you can redistribute it and/or | |
10 * modify it under the terms of the GNU Lesser General Public | |
11 * License as published by the Free Software Foundation; either | |
12 * version 2.1 of the License, or (at your option) any later version. | |
13 * | |
14 * FFmpeg is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Lesser General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Lesser General Public | |
20 * License along with FFmpeg; if not, write to the Free Software | |
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 */ | |
23 | |
24 #include "dsputil.h" | |
25 #include "vp8dsp.h" | |
26 | |
27 // TODO: Maybe add dequant | |
28 static void vp8_luma_dc_wht_c(DCTELEM block[4][4][16], DCTELEM dc[16]) | |
29 { | |
30 int i, t0, t1, t2, t3; | |
31 | |
32 for (i = 0; i < 4; i++) { | |
33 t0 = dc[0*4+i] + dc[3*4+i]; | |
34 t1 = dc[1*4+i] + dc[2*4+i]; | |
35 t2 = dc[1*4+i] - dc[2*4+i]; | |
36 t3 = dc[0*4+i] - dc[3*4+i]; | |
37 | |
38 dc[0*4+i] = t0 + t1; | |
39 dc[1*4+i] = t3 + t2; | |
40 dc[2*4+i] = t0 - t1; | |
41 dc[3*4+i] = t3 - t2; | |
42 } | |
43 | |
44 for (i = 0; i < 4; i++) { | |
45 t0 = dc[i*4+0] + dc[i*4+3] + 3; // rounding | |
46 t1 = dc[i*4+1] + dc[i*4+2]; | |
47 t2 = dc[i*4+1] - dc[i*4+2]; | |
48 t3 = dc[i*4+0] - dc[i*4+3] + 3; // rounding | |
49 | |
50 *block[i][0] = (t0 + t1) >> 3; | |
51 *block[i][1] = (t3 + t2) >> 3; | |
52 *block[i][2] = (t0 - t1) >> 3; | |
53 *block[i][3] = (t3 - t2) >> 3; | |
54 } | |
55 } | |
56 | |
57 | |
58 #define MUL_20091(a) ((((a)*20091) >> 16) + (a)) | |
59 #define MUL_35468(a) (((a)*35468) >> 16) | |
60 | |
61 static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], int stride) | |
62 { | |
63 int i, t0, t1, t2, t3; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
64 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
11921 | 65 DCTELEM tmp[16]; |
66 | |
67 for (i = 0; i < 4; i++) { | |
68 t0 = block[0*4+i] + block[2*4+i]; | |
69 t1 = block[0*4+i] - block[2*4+i]; | |
70 t2 = MUL_35468(block[1*4+i]) - MUL_20091(block[3*4+i]); | |
71 t3 = MUL_20091(block[1*4+i]) + MUL_35468(block[3*4+i]); | |
72 | |
73 tmp[i*4+0] = t0 + t3; | |
74 tmp[i*4+1] = t1 + t2; | |
75 tmp[i*4+2] = t1 - t2; | |
76 tmp[i*4+3] = t0 - t3; | |
77 } | |
78 | |
79 for (i = 0; i < 4; i++) { | |
80 t0 = tmp[0*4+i] + tmp[2*4+i]; | |
81 t1 = tmp[0*4+i] - tmp[2*4+i]; | |
82 t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]); | |
83 t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]); | |
84 | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
85 dst[0] = cm[dst[0] + ((t0 + t3 + 4) >> 3)]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
86 dst[1] = cm[dst[1] + ((t1 + t2 + 4) >> 3)]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
87 dst[2] = cm[dst[2] + ((t1 - t2 + 4) >> 3)]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
88 dst[3] = cm[dst[3] + ((t0 - t3 + 4) >> 3)]; |
11921 | 89 dst += stride; |
90 } | |
91 } | |
92 | |
93 static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], int stride) | |
94 { | |
95 int i, dc = (block[0] + 4) >> 3; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
96 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc; |
11921 | 97 |
98 for (i = 0; i < 4; i++) { | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
99 dst[0] = cm[dst[0]]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
100 dst[1] = cm[dst[1]]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
101 dst[2] = cm[dst[2]]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
102 dst[3] = cm[dst[3]]; |
11921 | 103 dst += stride; |
104 } | |
105 } | |
106 | |
107 | |
108 // because I like only having two parameters to pass functions... | |
109 #define LOAD_PIXELS\ | |
110 int av_unused p3 = p[-4*stride];\ | |
111 int av_unused p2 = p[-3*stride];\ | |
112 int av_unused p1 = p[-2*stride];\ | |
113 int av_unused p0 = p[-1*stride];\ | |
114 int av_unused q0 = p[ 0*stride];\ | |
115 int av_unused q1 = p[ 1*stride];\ | |
116 int av_unused q2 = p[ 2*stride];\ | |
117 int av_unused q3 = p[ 3*stride]; | |
118 | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
119 #define clip_int8(n) (cm[n+0x80]-0x80) |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
120 |
11921 | 121 static av_always_inline void filter_common(uint8_t *p, int stride, int is4tap) |
122 { | |
123 LOAD_PIXELS | |
124 int a, f1, f2; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
125 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
11921 | 126 |
127 a = 3*(q0 - p0); | |
128 | |
129 if (is4tap) | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
130 a += clip_int8(p1 - q1); |
11921 | 131 |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
132 a = clip_int8(a); |
11921 | 133 |
134 // We deviate from the spec here with c(a+3) >> 3 | |
135 // since that's what libvpx does. | |
136 f1 = FFMIN(a+4, 127) >> 3; | |
137 f2 = FFMIN(a+3, 127) >> 3; | |
138 | |
139 // Despite what the spec says, we do need to clamp here to | |
140 // be bitexact with libvpx. | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
141 p[-1*stride] = cm[p0 + f2]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
142 p[ 0*stride] = cm[q0 - f1]; |
11921 | 143 |
144 // only used for _inner on blocks without high edge variance | |
145 if (!is4tap) { | |
146 a = (f1+1)>>1; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
147 p[-2*stride] = cm[p1 + a]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
148 p[ 1*stride] = cm[q1 - a]; |
11921 | 149 } |
150 } | |
151 | |
152 static av_always_inline int simple_limit(uint8_t *p, int stride, int flim) | |
153 { | |
154 LOAD_PIXELS | |
155 return 2*FFABS(p0-q0) + (FFABS(p1-q1) >> 1) <= flim; | |
156 } | |
157 | |
158 /** | |
159 * E - limit at the macroblock edge | |
160 * I - limit for interior difference | |
161 */ | |
162 static av_always_inline int normal_limit(uint8_t *p, int stride, int E, int I) | |
163 { | |
164 LOAD_PIXELS | |
12081
812e23197d64
VP8: Move calculation of outer filter limit out of dsp functions for normal
conrad
parents:
12011
diff
changeset
|
165 return simple_limit(p, stride, E) |
11921 | 166 && FFABS(p3-p2) <= I && FFABS(p2-p1) <= I && FFABS(p1-p0) <= I |
167 && FFABS(q3-q2) <= I && FFABS(q2-q1) <= I && FFABS(q1-q0) <= I; | |
168 } | |
169 | |
170 // high edge variance | |
171 static av_always_inline int hev(uint8_t *p, int stride, int thresh) | |
172 { | |
173 LOAD_PIXELS | |
174 return FFABS(p1-p0) > thresh || FFABS(q1-q0) > thresh; | |
175 } | |
176 | |
177 static av_always_inline void filter_mbedge(uint8_t *p, int stride) | |
178 { | |
179 int a0, a1, a2, w; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
180 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
11921 | 181 |
182 LOAD_PIXELS | |
183 | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
184 w = clip_int8(p1-q1); |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
185 w = clip_int8(w + 3*(q0-p0)); |
11921 | 186 |
187 a0 = (27*w + 63) >> 7; | |
188 a1 = (18*w + 63) >> 7; | |
189 a2 = ( 9*w + 63) >> 7; | |
190 | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
191 p[-3*stride] = cm[p2 + a2]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
192 p[-2*stride] = cm[p1 + a1]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
193 p[-1*stride] = cm[p0 + a0]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
194 p[ 0*stride] = cm[q0 - a0]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
195 p[ 1*stride] = cm[q1 - a1]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
196 p[ 2*stride] = cm[q2 - a2]; |
11921 | 197 } |
198 | |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
199 #define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline) \ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
200 static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, int stride,\ |
11921 | 201 int flim_E, int flim_I, int hev_thresh)\ |
202 {\ | |
203 int i;\ | |
204 \ | |
205 for (i = 0; i < size; i++)\ | |
206 if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\ | |
207 if (hev(dst+i*stridea, strideb, hev_thresh))\ | |
208 filter_common(dst+i*stridea, strideb, 1);\ | |
209 else\ | |
210 filter_mbedge(dst+i*stridea, strideb);\ | |
211 }\ | |
212 }\ | |
213 \ | |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
214 static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, int stride,\ |
11921 | 215 int flim_E, int flim_I, int hev_thresh)\ |
216 {\ | |
12008 | 217 int i;\ |
11921 | 218 \ |
219 for (i = 0; i < size; i++)\ | |
220 if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\ | |
12008 | 221 int hv = hev(dst+i*stridea, strideb, hev_thresh);\ |
222 if (hv) \ | |
223 filter_common(dst+i*stridea, strideb, 1);\ | |
224 else \ | |
225 filter_common(dst+i*stridea, strideb, 0);\ | |
11921 | 226 }\ |
227 } | |
228 | |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
229 LOOP_FILTER(v, 16, 1, stride,) |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
230 LOOP_FILTER(h, 16, stride, 1,) |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
231 |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
232 #define UV_LOOP_FILTER(dir, stridea, strideb) \ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
233 LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline) \ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
234 static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV, int stride,\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
235 int fE, int fI, int hev_thresh)\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
236 {\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
237 vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
238 vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
239 }\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
240 static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, uint8_t *dstV, int stride,\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
241 int fE, int fI, int hev_thresh)\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
242 {\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
243 vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh);\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
244 vp8_ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, hev_thresh);\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
245 } |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
246 |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
247 UV_LOOP_FILTER(v, 1, stride) |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
248 UV_LOOP_FILTER(h, stride, 1) |
11921 | 249 |
250 static void vp8_v_loop_filter_simple_c(uint8_t *dst, int stride, int flim) | |
251 { | |
252 int i; | |
253 | |
254 for (i = 0; i < 16; i++) | |
255 if (simple_limit(dst+i, stride, flim)) | |
256 filter_common(dst+i, stride, 1); | |
257 } | |
258 | |
259 static void vp8_h_loop_filter_simple_c(uint8_t *dst, int stride, int flim) | |
260 { | |
261 int i; | |
262 | |
263 for (i = 0; i < 16; i++) | |
264 if (simple_limit(dst+i*stride, 1, flim)) | |
265 filter_common(dst+i*stride, 1, 1); | |
266 } | |
267 | |
268 static const uint8_t subpel_filters[7][6] = { | |
269 { 0, 6, 123, 12, 1, 0 }, | |
270 { 2, 11, 108, 36, 8, 1 }, | |
271 { 0, 9, 93, 50, 6, 0 }, | |
272 { 3, 16, 77, 77, 16, 3 }, | |
273 { 0, 6, 50, 93, 9, 0 }, | |
274 { 1, 8, 36, 108, 11, 2 }, | |
275 { 0, 1, 12, 123, 6, 0 }, | |
276 }; | |
277 | |
11950 | 278 #define PUT_PIXELS(WIDTH) \ |
279 static void put_vp8_pixels ## WIDTH ##_c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int x, int y) { \ | |
11956 | 280 int i; \ |
281 for (i = 0; i < h; i++, dst+= dststride, src+= srcstride) { \ | |
11950 | 282 memcpy(dst, src, WIDTH); \ |
283 } \ | |
284 } | |
285 | |
286 PUT_PIXELS(16) | |
287 PUT_PIXELS(8) | |
288 PUT_PIXELS(4) | |
11921 | 289 |
290 #define FILTER_6TAP(src, F, stride) \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
291 cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + F[0]*src[x-2*stride] + \ |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
292 F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + F[5]*src[x+3*stride] + 64) >> 7] |
11921 | 293 |
294 #define FILTER_4TAP(src, F, stride) \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
295 cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + \ |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
296 F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + 64) >> 7] |
11921 | 297 |
298 #define VP8_EPEL_H(SIZE, FILTER, FILTERNAME) \ | |
11950 | 299 static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \ |
11921 | 300 { \ |
301 const uint8_t *filter = subpel_filters[mx-1]; \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
302 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ |
11921 | 303 int x, y; \ |
304 \ | |
305 for (y = 0; y < h; y++) { \ | |
306 for (x = 0; x < SIZE; x++) \ | |
307 dst[x] = FILTER(src, filter, 1); \ | |
11950 | 308 dst += dststride; \ |
309 src += srcstride; \ | |
11921 | 310 } \ |
311 } | |
312 #define VP8_EPEL_V(SIZE, FILTER, FILTERNAME) \ | |
11950 | 313 static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \ |
11921 | 314 { \ |
315 const uint8_t *filter = subpel_filters[my-1]; \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
316 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ |
11921 | 317 int x, y; \ |
318 \ | |
319 for (y = 0; y < h; y++) { \ | |
320 for (x = 0; x < SIZE; x++) \ | |
11950 | 321 dst[x] = FILTER(src, filter, srcstride); \ |
322 dst += dststride; \ | |
323 src += srcstride; \ | |
11921 | 324 } \ |
325 } | |
326 #define VP8_EPEL_HV(SIZE, FILTERX, FILTERY, FILTERNAME) \ | |
11950 | 327 static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \ |
11921 | 328 { \ |
329 const uint8_t *filter = subpel_filters[mx-1]; \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
330 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ |
11921 | 331 int x, y; \ |
332 uint8_t tmp_array[(2*SIZE+5)*SIZE]; \ | |
333 uint8_t *tmp = tmp_array; \ | |
11950 | 334 src -= 2*srcstride; \ |
11921 | 335 \ |
336 for (y = 0; y < h+5; y++) { \ | |
337 for (x = 0; x < SIZE; x++) \ | |
338 tmp[x] = FILTERX(src, filter, 1); \ | |
339 tmp += SIZE; \ | |
11950 | 340 src += srcstride; \ |
11921 | 341 } \ |
342 \ | |
343 tmp = tmp_array + 2*SIZE; \ | |
344 filter = subpel_filters[my-1]; \ | |
345 \ | |
346 for (y = 0; y < h; y++) { \ | |
347 for (x = 0; x < SIZE; x++) \ | |
348 dst[x] = FILTERY(tmp, filter, SIZE); \ | |
11950 | 349 dst += dststride; \ |
11921 | 350 tmp += SIZE; \ |
351 } \ | |
352 } | |
353 | |
354 VP8_EPEL_H(16, FILTER_4TAP, h4) | |
355 VP8_EPEL_H(8, FILTER_4TAP, h4) | |
356 VP8_EPEL_H(4, FILTER_4TAP, h4) | |
357 VP8_EPEL_H(16, FILTER_6TAP, h6) | |
358 VP8_EPEL_H(8, FILTER_6TAP, h6) | |
359 VP8_EPEL_H(4, FILTER_6TAP, h6) | |
360 VP8_EPEL_V(16, FILTER_4TAP, v4) | |
361 VP8_EPEL_V(8, FILTER_4TAP, v4) | |
362 VP8_EPEL_V(4, FILTER_4TAP, v4) | |
363 VP8_EPEL_V(16, FILTER_6TAP, v6) | |
364 VP8_EPEL_V(8, FILTER_6TAP, v6) | |
365 VP8_EPEL_V(4, FILTER_6TAP, v6) | |
366 VP8_EPEL_HV(16, FILTER_4TAP, FILTER_4TAP, h4v4) | |
367 VP8_EPEL_HV(8, FILTER_4TAP, FILTER_4TAP, h4v4) | |
368 VP8_EPEL_HV(4, FILTER_4TAP, FILTER_4TAP, h4v4) | |
369 VP8_EPEL_HV(16, FILTER_4TAP, FILTER_6TAP, h4v6) | |
370 VP8_EPEL_HV(8, FILTER_4TAP, FILTER_6TAP, h4v6) | |
371 VP8_EPEL_HV(4, FILTER_4TAP, FILTER_6TAP, h4v6) | |
372 VP8_EPEL_HV(16, FILTER_6TAP, FILTER_4TAP, h6v4) | |
373 VP8_EPEL_HV(8, FILTER_6TAP, FILTER_4TAP, h6v4) | |
374 VP8_EPEL_HV(4, FILTER_6TAP, FILTER_4TAP, h6v4) | |
375 VP8_EPEL_HV(16, FILTER_6TAP, FILTER_6TAP, h6v6) | |
376 VP8_EPEL_HV(8, FILTER_6TAP, FILTER_6TAP, h6v6) | |
377 VP8_EPEL_HV(4, FILTER_6TAP, FILTER_6TAP, h6v6) | |
378 | |
11974 | 379 #define VP8_BILINEAR(SIZE) \ |
380 static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \ | |
381 { \ | |
382 int a = 8-mx, b = mx; \ | |
383 int x, y; \ | |
384 \ | |
385 for (y = 0; y < h; y++) { \ | |
386 for (x = 0; x < SIZE; x++) \ | |
387 dst[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \ | |
388 dst += stride; \ | |
389 src += stride; \ | |
390 } \ | |
391 } \ | |
392 static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \ | |
393 { \ | |
394 int c = 8-my, d = my; \ | |
395 int x, y; \ | |
396 \ | |
397 for (y = 0; y < h; y++) { \ | |
398 for (x = 0; x < SIZE; x++) \ | |
399 dst[x] = (c*src[x] + d*src[x+stride] + 4) >> 3; \ | |
400 dst += stride; \ | |
401 src += stride; \ | |
402 } \ | |
403 } \ | |
404 \ | |
405 static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \ | |
406 { \ | |
407 int a = 8-mx, b = mx; \ | |
408 int c = 8-my, d = my; \ | |
409 int x, y; \ | |
410 uint8_t tmp_array[(2*SIZE+1)*SIZE]; \ | |
411 uint8_t *tmp = tmp_array; \ | |
412 \ | |
413 for (y = 0; y < h+1; y++) { \ | |
414 for (x = 0; x < SIZE; x++) \ | |
415 tmp[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \ | |
416 tmp += SIZE; \ | |
417 src += stride; \ | |
418 } \ | |
419 \ | |
420 tmp = tmp_array; \ | |
421 \ | |
422 for (y = 0; y < h; y++) { \ | |
423 for (x = 0; x < SIZE; x++) \ | |
424 dst[x] = (c*tmp[x] + d*tmp[x+SIZE] + 4) >> 3; \ | |
425 dst += stride; \ | |
426 tmp += SIZE; \ | |
427 } \ | |
428 } | |
429 | |
430 VP8_BILINEAR(16) | |
431 VP8_BILINEAR(8) | |
432 VP8_BILINEAR(4) | |
433 | |
11921 | 434 #define VP8_MC_FUNC(IDX, SIZE) \ |
11950 | 435 dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ |
11921 | 436 dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \ |
437 dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \ | |
438 dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \ | |
439 dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \ | |
440 dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \ | |
441 dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \ | |
442 dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \ | |
443 dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c | |
444 | |
11974 | 445 #define VP8_BILINEAR_MC_FUNC(IDX, SIZE) \ |
446 dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ | |
447 dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \ | |
448 dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \ | |
449 dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] = put_vp8_bilinear ## SIZE ## _v_c; \ | |
450 dsp->put_vp8_bilinear_pixels_tab[IDX][1][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ | |
451 dsp->put_vp8_bilinear_pixels_tab[IDX][1][2] = put_vp8_bilinear ## SIZE ## _hv_c; \ | |
452 dsp->put_vp8_bilinear_pixels_tab[IDX][2][0] = put_vp8_bilinear ## SIZE ## _v_c; \ | |
453 dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ | |
454 dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c | |
455 | |
11921 | 456 av_cold void ff_vp8dsp_init(VP8DSPContext *dsp) |
457 { | |
458 dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c; | |
459 dsp->vp8_idct_add = vp8_idct_add_c; | |
460 dsp->vp8_idct_dc_add = vp8_idct_dc_add_c; | |
461 | |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
462 dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c; |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
463 dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c; |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
464 dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c; |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
465 dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c; |
11921 | 466 |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
467 dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c; |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
468 dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c; |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
469 dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c; |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
470 dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c; |
11921 | 471 |
472 dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c; | |
473 dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c; | |
474 | |
475 VP8_MC_FUNC(0, 16); | |
476 VP8_MC_FUNC(1, 8); | |
477 VP8_MC_FUNC(2, 4); | |
11974 | 478 |
479 VP8_BILINEAR_MC_FUNC(0, 16); | |
480 VP8_BILINEAR_MC_FUNC(1, 8); | |
481 VP8_BILINEAR_MC_FUNC(2, 4); | |
11975 | 482 |
11985 | 483 if (HAVE_MMX) |
11975 | 484 ff_vp8dsp_init_x86(dsp); |
12011 | 485 if (HAVE_ALTIVEC) |
486 ff_vp8dsp_init_altivec(dsp); | |
11921 | 487 } |