Mercurial > libavcodec.hg
annotate vp8dsp.c @ 12483:0159a19bfff7 libavcodec
aacdec: Rework channel mapping compatibility hacks.
For a PCE based configuration map the channels solely based on tags.
For an indexed configuration map the channels solely based on position.
This works with all known exotic samples including al17, elem_id0, bad_concat,
and lfe_is_sce.
author | alexc |
---|---|
date | Fri, 10 Sep 2010 18:01:48 +0000 |
parents | b4c63ffd959b |
children |
rev | line source |
---|---|
11921 | 1 /** |
2 * VP8 compatible video decoder | |
3 * | |
4 * Copyright (C) 2010 David Conrad | |
5 * Copyright (C) 2010 Ronald S. Bultje | |
6 * | |
7 * This file is part of FFmpeg. | |
8 * | |
9 * FFmpeg is free software; you can redistribute it and/or | |
10 * modify it under the terms of the GNU Lesser General Public | |
11 * License as published by the Free Software Foundation; either | |
12 * version 2.1 of the License, or (at your option) any later version. | |
13 * | |
14 * FFmpeg is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Lesser General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Lesser General Public | |
20 * License along with FFmpeg; if not, write to the Free Software | |
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 */ | |
23 | |
24 #include "dsputil.h" | |
25 #include "vp8dsp.h" | |
26 | |
27 // TODO: Maybe add dequant | |
28 static void vp8_luma_dc_wht_c(DCTELEM block[4][4][16], DCTELEM dc[16]) | |
29 { | |
30 int i, t0, t1, t2, t3; | |
31 | |
32 for (i = 0; i < 4; i++) { | |
33 t0 = dc[0*4+i] + dc[3*4+i]; | |
34 t1 = dc[1*4+i] + dc[2*4+i]; | |
35 t2 = dc[1*4+i] - dc[2*4+i]; | |
36 t3 = dc[0*4+i] - dc[3*4+i]; | |
37 | |
38 dc[0*4+i] = t0 + t1; | |
39 dc[1*4+i] = t3 + t2; | |
40 dc[2*4+i] = t0 - t1; | |
41 dc[3*4+i] = t3 - t2; | |
42 } | |
43 | |
44 for (i = 0; i < 4; i++) { | |
45 t0 = dc[i*4+0] + dc[i*4+3] + 3; // rounding | |
46 t1 = dc[i*4+1] + dc[i*4+2]; | |
47 t2 = dc[i*4+1] - dc[i*4+2]; | |
48 t3 = dc[i*4+0] - dc[i*4+3] + 3; // rounding | |
12340
2d15f62f4f8a
VP8: move zeroing of luma DC block into the WHT
darkshikari
parents:
12241
diff
changeset
|
49 dc[i*4+0] = 0; |
2d15f62f4f8a
VP8: move zeroing of luma DC block into the WHT
darkshikari
parents:
12241
diff
changeset
|
50 dc[i*4+1] = 0; |
2d15f62f4f8a
VP8: move zeroing of luma DC block into the WHT
darkshikari
parents:
12241
diff
changeset
|
51 dc[i*4+2] = 0; |
2d15f62f4f8a
VP8: move zeroing of luma DC block into the WHT
darkshikari
parents:
12241
diff
changeset
|
52 dc[i*4+3] = 0; |
11921 | 53 |
12342 | 54 block[i][0][0] = (t0 + t1) >> 3; |
55 block[i][1][0] = (t3 + t2) >> 3; | |
56 block[i][2][0] = (t0 - t1) >> 3; | |
57 block[i][3][0] = (t3 - t2) >> 3; | |
11921 | 58 } |
59 } | |
60 | |
12342 | 61 static void vp8_luma_dc_wht_dc_c(DCTELEM block[4][4][16], DCTELEM dc[16]) |
62 { | |
63 int i, val = (dc[0] + 3) >> 3; | |
64 dc[0] = 0; | |
65 | |
66 for (i = 0; i < 4; i++) { | |
67 block[i][0][0] = val; | |
68 block[i][1][0] = val; | |
69 block[i][2][0] = val; | |
70 block[i][3][0] = val; | |
71 } | |
72 } | |
11921 | 73 |
74 #define MUL_20091(a) ((((a)*20091) >> 16) + (a)) | |
75 #define MUL_35468(a) (((a)*35468) >> 16) | |
76 | |
77 static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], int stride) | |
78 { | |
79 int i, t0, t1, t2, t3; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
80 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
11921 | 81 DCTELEM tmp[16]; |
82 | |
83 for (i = 0; i < 4; i++) { | |
84 t0 = block[0*4+i] + block[2*4+i]; | |
85 t1 = block[0*4+i] - block[2*4+i]; | |
86 t2 = MUL_35468(block[1*4+i]) - MUL_20091(block[3*4+i]); | |
87 t3 = MUL_20091(block[1*4+i]) + MUL_35468(block[3*4+i]); | |
12235
e08d65897115
VP8: clear DCT blocks in iDCT instead of using clear_blocks.
darkshikari
parents:
12194
diff
changeset
|
88 block[0*4+i] = 0; |
e08d65897115
VP8: clear DCT blocks in iDCT instead of using clear_blocks.
darkshikari
parents:
12194
diff
changeset
|
89 block[1*4+i] = 0; |
e08d65897115
VP8: clear DCT blocks in iDCT instead of using clear_blocks.
darkshikari
parents:
12194
diff
changeset
|
90 block[2*4+i] = 0; |
e08d65897115
VP8: clear DCT blocks in iDCT instead of using clear_blocks.
darkshikari
parents:
12194
diff
changeset
|
91 block[3*4+i] = 0; |
11921 | 92 |
93 tmp[i*4+0] = t0 + t3; | |
94 tmp[i*4+1] = t1 + t2; | |
95 tmp[i*4+2] = t1 - t2; | |
96 tmp[i*4+3] = t0 - t3; | |
97 } | |
98 | |
99 for (i = 0; i < 4; i++) { | |
100 t0 = tmp[0*4+i] + tmp[2*4+i]; | |
101 t1 = tmp[0*4+i] - tmp[2*4+i]; | |
102 t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]); | |
103 t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]); | |
104 | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
105 dst[0] = cm[dst[0] + ((t0 + t3 + 4) >> 3)]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
106 dst[1] = cm[dst[1] + ((t1 + t2 + 4) >> 3)]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
107 dst[2] = cm[dst[2] + ((t1 - t2 + 4) >> 3)]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
108 dst[3] = cm[dst[3] + ((t0 - t3 + 4) >> 3)]; |
11921 | 109 dst += stride; |
110 } | |
111 } | |
112 | |
113 static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], int stride) | |
114 { | |
115 int i, dc = (block[0] + 4) >> 3; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
116 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc; |
12235
e08d65897115
VP8: clear DCT blocks in iDCT instead of using clear_blocks.
darkshikari
parents:
12194
diff
changeset
|
117 block[0] = 0; |
11921 | 118 |
119 for (i = 0; i < 4; i++) { | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
120 dst[0] = cm[dst[0]]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
121 dst[1] = cm[dst[1]]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
122 dst[2] = cm[dst[2]]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
123 dst[3] = cm[dst[3]]; |
11921 | 124 dst += stride; |
125 } | |
126 } | |
127 | |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
128 static void vp8_idct_dc_add4uv_c(uint8_t *dst, DCTELEM block[4][16], int stride) |
12238 | 129 { |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
130 vp8_idct_dc_add_c(dst+stride*0+0, block[0], stride); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
131 vp8_idct_dc_add_c(dst+stride*0+4, block[1], stride); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
132 vp8_idct_dc_add_c(dst+stride*4+0, block[2], stride); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
133 vp8_idct_dc_add_c(dst+stride*4+4, block[3], stride); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
134 } |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
135 |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
136 static void vp8_idct_dc_add4y_c(uint8_t *dst, DCTELEM block[4][16], int stride) |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
137 { |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
138 vp8_idct_dc_add_c(dst+ 0, block[0], stride); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
139 vp8_idct_dc_add_c(dst+ 4, block[1], stride); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
140 vp8_idct_dc_add_c(dst+ 8, block[2], stride); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
141 vp8_idct_dc_add_c(dst+12, block[3], stride); |
12238 | 142 } |
11921 | 143 |
144 // because I like only having two parameters to pass functions... | |
145 #define LOAD_PIXELS\ | |
146 int av_unused p3 = p[-4*stride];\ | |
147 int av_unused p2 = p[-3*stride];\ | |
148 int av_unused p1 = p[-2*stride];\ | |
149 int av_unused p0 = p[-1*stride];\ | |
150 int av_unused q0 = p[ 0*stride];\ | |
151 int av_unused q1 = p[ 1*stride];\ | |
152 int av_unused q2 = p[ 2*stride];\ | |
153 int av_unused q3 = p[ 3*stride]; | |
154 | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
155 #define clip_int8(n) (cm[n+0x80]-0x80) |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
156 |
11921 | 157 static av_always_inline void filter_common(uint8_t *p, int stride, int is4tap) |
158 { | |
159 LOAD_PIXELS | |
160 int a, f1, f2; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
161 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
11921 | 162 |
163 a = 3*(q0 - p0); | |
164 | |
165 if (is4tap) | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
166 a += clip_int8(p1 - q1); |
11921 | 167 |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
168 a = clip_int8(a); |
11921 | 169 |
170 // We deviate from the spec here with c(a+3) >> 3 | |
171 // since that's what libvpx does. | |
172 f1 = FFMIN(a+4, 127) >> 3; | |
173 f2 = FFMIN(a+3, 127) >> 3; | |
174 | |
175 // Despite what the spec says, we do need to clamp here to | |
176 // be bitexact with libvpx. | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
177 p[-1*stride] = cm[p0 + f2]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
178 p[ 0*stride] = cm[q0 - f1]; |
11921 | 179 |
180 // only used for _inner on blocks without high edge variance | |
181 if (!is4tap) { | |
182 a = (f1+1)>>1; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
183 p[-2*stride] = cm[p1 + a]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
184 p[ 1*stride] = cm[q1 - a]; |
11921 | 185 } |
186 } | |
187 | |
188 static av_always_inline int simple_limit(uint8_t *p, int stride, int flim) | |
189 { | |
190 LOAD_PIXELS | |
191 return 2*FFABS(p0-q0) + (FFABS(p1-q1) >> 1) <= flim; | |
192 } | |
193 | |
194 /** | |
195 * E - limit at the macroblock edge | |
196 * I - limit for interior difference | |
197 */ | |
198 static av_always_inline int normal_limit(uint8_t *p, int stride, int E, int I) | |
199 { | |
200 LOAD_PIXELS | |
12081
812e23197d64
VP8: Move calculation of outer filter limit out of dsp functions for normal
conrad
parents:
12011
diff
changeset
|
201 return simple_limit(p, stride, E) |
11921 | 202 && FFABS(p3-p2) <= I && FFABS(p2-p1) <= I && FFABS(p1-p0) <= I |
203 && FFABS(q3-q2) <= I && FFABS(q2-q1) <= I && FFABS(q1-q0) <= I; | |
204 } | |
205 | |
206 // high edge variance | |
207 static av_always_inline int hev(uint8_t *p, int stride, int thresh) | |
208 { | |
209 LOAD_PIXELS | |
210 return FFABS(p1-p0) > thresh || FFABS(q1-q0) > thresh; | |
211 } | |
212 | |
213 static av_always_inline void filter_mbedge(uint8_t *p, int stride) | |
214 { | |
215 int a0, a1, a2, w; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
216 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
11921 | 217 |
218 LOAD_PIXELS | |
219 | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
220 w = clip_int8(p1-q1); |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
221 w = clip_int8(w + 3*(q0-p0)); |
11921 | 222 |
223 a0 = (27*w + 63) >> 7; | |
224 a1 = (18*w + 63) >> 7; | |
225 a2 = ( 9*w + 63) >> 7; | |
226 | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
227 p[-3*stride] = cm[p2 + a2]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
228 p[-2*stride] = cm[p1 + a1]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
229 p[-1*stride] = cm[p0 + a0]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
230 p[ 0*stride] = cm[q0 - a0]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
231 p[ 1*stride] = cm[q1 - a1]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
232 p[ 2*stride] = cm[q2 - a2]; |
11921 | 233 } |
234 | |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
235 #define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline) \ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
236 static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, int stride,\ |
11921 | 237 int flim_E, int flim_I, int hev_thresh)\ |
238 {\ | |
239 int i;\ | |
240 \ | |
241 for (i = 0; i < size; i++)\ | |
242 if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\ | |
243 if (hev(dst+i*stridea, strideb, hev_thresh))\ | |
244 filter_common(dst+i*stridea, strideb, 1);\ | |
245 else\ | |
246 filter_mbedge(dst+i*stridea, strideb);\ | |
247 }\ | |
248 }\ | |
249 \ | |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
250 static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, int stride,\ |
11921 | 251 int flim_E, int flim_I, int hev_thresh)\ |
252 {\ | |
12008 | 253 int i;\ |
11921 | 254 \ |
255 for (i = 0; i < size; i++)\ | |
256 if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\ | |
12008 | 257 int hv = hev(dst+i*stridea, strideb, hev_thresh);\ |
258 if (hv) \ | |
259 filter_common(dst+i*stridea, strideb, 1);\ | |
260 else \ | |
261 filter_common(dst+i*stridea, strideb, 0);\ | |
11921 | 262 }\ |
263 } | |
264 | |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
265 LOOP_FILTER(v, 16, 1, stride,) |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
266 LOOP_FILTER(h, 16, stride, 1,) |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
267 |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
268 #define UV_LOOP_FILTER(dir, stridea, strideb) \ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
269 LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline) \ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
270 static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV, int stride,\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
271 int fE, int fI, int hev_thresh)\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
272 {\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
273 vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
274 vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
275 }\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
276 static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, uint8_t *dstV, int stride,\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
277 int fE, int fI, int hev_thresh)\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
278 {\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
279 vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh);\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
280 vp8_ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, hev_thresh);\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
281 } |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
282 |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
283 UV_LOOP_FILTER(v, 1, stride) |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
284 UV_LOOP_FILTER(h, stride, 1) |
11921 | 285 |
286 static void vp8_v_loop_filter_simple_c(uint8_t *dst, int stride, int flim) | |
287 { | |
288 int i; | |
289 | |
290 for (i = 0; i < 16; i++) | |
291 if (simple_limit(dst+i, stride, flim)) | |
292 filter_common(dst+i, stride, 1); | |
293 } | |
294 | |
295 static void vp8_h_loop_filter_simple_c(uint8_t *dst, int stride, int flim) | |
296 { | |
297 int i; | |
298 | |
299 for (i = 0; i < 16; i++) | |
300 if (simple_limit(dst+i*stride, 1, flim)) | |
301 filter_common(dst+i*stride, 1, 1); | |
302 } | |
303 | |
304 static const uint8_t subpel_filters[7][6] = { | |
305 { 0, 6, 123, 12, 1, 0 }, | |
306 { 2, 11, 108, 36, 8, 1 }, | |
307 { 0, 9, 93, 50, 6, 0 }, | |
308 { 3, 16, 77, 77, 16, 3 }, | |
309 { 0, 6, 50, 93, 9, 0 }, | |
310 { 1, 8, 36, 108, 11, 2 }, | |
311 { 0, 1, 12, 123, 6, 0 }, | |
312 }; | |
313 | |
11950 | 314 #define PUT_PIXELS(WIDTH) \ |
315 static void put_vp8_pixels ## WIDTH ##_c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int x, int y) { \ | |
11956 | 316 int i; \ |
317 for (i = 0; i < h; i++, dst+= dststride, src+= srcstride) { \ | |
11950 | 318 memcpy(dst, src, WIDTH); \ |
319 } \ | |
320 } | |
321 | |
322 PUT_PIXELS(16) | |
323 PUT_PIXELS(8) | |
324 PUT_PIXELS(4) | |
11921 | 325 |
326 #define FILTER_6TAP(src, F, stride) \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
327 cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + F[0]*src[x-2*stride] + \ |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
328 F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + F[5]*src[x+3*stride] + 64) >> 7] |
11921 | 329 |
330 #define FILTER_4TAP(src, F, stride) \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
331 cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + \ |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
332 F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + 64) >> 7] |
11921 | 333 |
334 #define VP8_EPEL_H(SIZE, FILTER, FILTERNAME) \ | |
11950 | 335 static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \ |
11921 | 336 { \ |
337 const uint8_t *filter = subpel_filters[mx-1]; \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
338 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ |
11921 | 339 int x, y; \ |
340 \ | |
341 for (y = 0; y < h; y++) { \ | |
342 for (x = 0; x < SIZE; x++) \ | |
343 dst[x] = FILTER(src, filter, 1); \ | |
11950 | 344 dst += dststride; \ |
345 src += srcstride; \ | |
11921 | 346 } \ |
347 } | |
348 #define VP8_EPEL_V(SIZE, FILTER, FILTERNAME) \ | |
11950 | 349 static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \ |
11921 | 350 { \ |
351 const uint8_t *filter = subpel_filters[my-1]; \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
352 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ |
11921 | 353 int x, y; \ |
354 \ | |
355 for (y = 0; y < h; y++) { \ | |
356 for (x = 0; x < SIZE; x++) \ | |
11950 | 357 dst[x] = FILTER(src, filter, srcstride); \ |
358 dst += dststride; \ | |
359 src += srcstride; \ | |
11921 | 360 } \ |
361 } | |
362 #define VP8_EPEL_HV(SIZE, FILTERX, FILTERY, FILTERNAME) \ | |
11950 | 363 static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \ |
11921 | 364 { \ |
365 const uint8_t *filter = subpel_filters[mx-1]; \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
366 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ |
11921 | 367 int x, y; \ |
368 uint8_t tmp_array[(2*SIZE+5)*SIZE]; \ | |
369 uint8_t *tmp = tmp_array; \ | |
11950 | 370 src -= 2*srcstride; \ |
11921 | 371 \ |
372 for (y = 0; y < h+5; y++) { \ | |
373 for (x = 0; x < SIZE; x++) \ | |
374 tmp[x] = FILTERX(src, filter, 1); \ | |
375 tmp += SIZE; \ | |
11950 | 376 src += srcstride; \ |
11921 | 377 } \ |
378 \ | |
379 tmp = tmp_array + 2*SIZE; \ | |
380 filter = subpel_filters[my-1]; \ | |
381 \ | |
382 for (y = 0; y < h; y++) { \ | |
383 for (x = 0; x < SIZE; x++) \ | |
384 dst[x] = FILTERY(tmp, filter, SIZE); \ | |
11950 | 385 dst += dststride; \ |
11921 | 386 tmp += SIZE; \ |
387 } \ | |
388 } | |
389 | |
390 VP8_EPEL_H(16, FILTER_4TAP, h4) | |
391 VP8_EPEL_H(8, FILTER_4TAP, h4) | |
392 VP8_EPEL_H(4, FILTER_4TAP, h4) | |
393 VP8_EPEL_H(16, FILTER_6TAP, h6) | |
394 VP8_EPEL_H(8, FILTER_6TAP, h6) | |
395 VP8_EPEL_H(4, FILTER_6TAP, h6) | |
396 VP8_EPEL_V(16, FILTER_4TAP, v4) | |
397 VP8_EPEL_V(8, FILTER_4TAP, v4) | |
398 VP8_EPEL_V(4, FILTER_4TAP, v4) | |
399 VP8_EPEL_V(16, FILTER_6TAP, v6) | |
400 VP8_EPEL_V(8, FILTER_6TAP, v6) | |
401 VP8_EPEL_V(4, FILTER_6TAP, v6) | |
402 VP8_EPEL_HV(16, FILTER_4TAP, FILTER_4TAP, h4v4) | |
403 VP8_EPEL_HV(8, FILTER_4TAP, FILTER_4TAP, h4v4) | |
404 VP8_EPEL_HV(4, FILTER_4TAP, FILTER_4TAP, h4v4) | |
405 VP8_EPEL_HV(16, FILTER_4TAP, FILTER_6TAP, h4v6) | |
406 VP8_EPEL_HV(8, FILTER_4TAP, FILTER_6TAP, h4v6) | |
407 VP8_EPEL_HV(4, FILTER_4TAP, FILTER_6TAP, h4v6) | |
408 VP8_EPEL_HV(16, FILTER_6TAP, FILTER_4TAP, h6v4) | |
409 VP8_EPEL_HV(8, FILTER_6TAP, FILTER_4TAP, h6v4) | |
410 VP8_EPEL_HV(4, FILTER_6TAP, FILTER_4TAP, h6v4) | |
411 VP8_EPEL_HV(16, FILTER_6TAP, FILTER_6TAP, h6v6) | |
412 VP8_EPEL_HV(8, FILTER_6TAP, FILTER_6TAP, h6v6) | |
413 VP8_EPEL_HV(4, FILTER_6TAP, FILTER_6TAP, h6v6) | |
414 | |
11974 | 415 #define VP8_BILINEAR(SIZE) \ |
416 static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \ | |
417 { \ | |
418 int a = 8-mx, b = mx; \ | |
419 int x, y; \ | |
420 \ | |
421 for (y = 0; y < h; y++) { \ | |
422 for (x = 0; x < SIZE; x++) \ | |
423 dst[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \ | |
424 dst += stride; \ | |
425 src += stride; \ | |
426 } \ | |
427 } \ | |
428 static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \ | |
429 { \ | |
430 int c = 8-my, d = my; \ | |
431 int x, y; \ | |
432 \ | |
433 for (y = 0; y < h; y++) { \ | |
434 for (x = 0; x < SIZE; x++) \ | |
435 dst[x] = (c*src[x] + d*src[x+stride] + 4) >> 3; \ | |
436 dst += stride; \ | |
437 src += stride; \ | |
438 } \ | |
439 } \ | |
440 \ | |
441 static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \ | |
442 { \ | |
443 int a = 8-mx, b = mx; \ | |
444 int c = 8-my, d = my; \ | |
445 int x, y; \ | |
446 uint8_t tmp_array[(2*SIZE+1)*SIZE]; \ | |
447 uint8_t *tmp = tmp_array; \ | |
448 \ | |
449 for (y = 0; y < h+1; y++) { \ | |
450 for (x = 0; x < SIZE; x++) \ | |
451 tmp[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \ | |
452 tmp += SIZE; \ | |
453 src += stride; \ | |
454 } \ | |
455 \ | |
456 tmp = tmp_array; \ | |
457 \ | |
458 for (y = 0; y < h; y++) { \ | |
459 for (x = 0; x < SIZE; x++) \ | |
460 dst[x] = (c*tmp[x] + d*tmp[x+SIZE] + 4) >> 3; \ | |
461 dst += stride; \ | |
462 tmp += SIZE; \ | |
463 } \ | |
464 } | |
465 | |
466 VP8_BILINEAR(16) | |
467 VP8_BILINEAR(8) | |
468 VP8_BILINEAR(4) | |
469 | |
11921 | 470 #define VP8_MC_FUNC(IDX, SIZE) \ |
11950 | 471 dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ |
11921 | 472 dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \ |
473 dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \ | |
474 dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \ | |
475 dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \ | |
476 dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \ | |
477 dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \ | |
478 dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \ | |
479 dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c | |
480 | |
11974 | 481 #define VP8_BILINEAR_MC_FUNC(IDX, SIZE) \ |
482 dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ | |
483 dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \ | |
484 dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \ | |
485 dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] = put_vp8_bilinear ## SIZE ## _v_c; \ | |
486 dsp->put_vp8_bilinear_pixels_tab[IDX][1][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ | |
487 dsp->put_vp8_bilinear_pixels_tab[IDX][1][2] = put_vp8_bilinear ## SIZE ## _hv_c; \ | |
488 dsp->put_vp8_bilinear_pixels_tab[IDX][2][0] = put_vp8_bilinear ## SIZE ## _v_c; \ | |
489 dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ | |
490 dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c | |
491 | |
11921 | 492 av_cold void ff_vp8dsp_init(VP8DSPContext *dsp) |
493 { | |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
494 dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c; |
12342 | 495 dsp->vp8_luma_dc_wht_dc = vp8_luma_dc_wht_dc_c; |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
496 dsp->vp8_idct_add = vp8_idct_add_c; |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
497 dsp->vp8_idct_dc_add = vp8_idct_dc_add_c; |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
498 dsp->vp8_idct_dc_add4y = vp8_idct_dc_add4y_c; |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
499 dsp->vp8_idct_dc_add4uv = vp8_idct_dc_add4uv_c; |
11921 | 500 |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
501 dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c; |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
502 dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c; |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
503 dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c; |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
504 dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c; |
11921 | 505 |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
506 dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c; |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
507 dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c; |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
508 dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c; |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
509 dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c; |
11921 | 510 |
511 dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c; | |
512 dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c; | |
513 | |
514 VP8_MC_FUNC(0, 16); | |
515 VP8_MC_FUNC(1, 8); | |
516 VP8_MC_FUNC(2, 4); | |
11974 | 517 |
518 VP8_BILINEAR_MC_FUNC(0, 16); | |
519 VP8_BILINEAR_MC_FUNC(1, 8); | |
520 VP8_BILINEAR_MC_FUNC(2, 4); | |
11975 | 521 |
11985 | 522 if (HAVE_MMX) |
11975 | 523 ff_vp8dsp_init_x86(dsp); |
12011 | 524 if (HAVE_ALTIVEC) |
525 ff_vp8dsp_init_altivec(dsp); | |
11921 | 526 } |