Mercurial > libavcodec.hg
annotate vp8dsp.c @ 12340:2d15f62f4f8a libavcodec
VP8: move zeroing of luma DC block into the WHT
Lets us do the zeroing in asm instead of C.
Also makes it consistent with the way the regular iDCT code does it.
author | darkshikari |
---|---|
date | Mon, 02 Aug 2010 20:18:09 +0000 |
parents | c7f6ddcc5c01 |
children | b4c63ffd959b |
rev | line source |
---|---|
11921 | 1 /** |
2 * VP8 compatible video decoder | |
3 * | |
4 * Copyright (C) 2010 David Conrad | |
5 * Copyright (C) 2010 Ronald S. Bultje | |
6 * | |
7 * This file is part of FFmpeg. | |
8 * | |
9 * FFmpeg is free software; you can redistribute it and/or | |
10 * modify it under the terms of the GNU Lesser General Public | |
11 * License as published by the Free Software Foundation; either | |
12 * version 2.1 of the License, or (at your option) any later version. | |
13 * | |
14 * FFmpeg is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Lesser General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Lesser General Public | |
20 * License along with FFmpeg; if not, write to the Free Software | |
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
22 */ | |
23 | |
24 #include "dsputil.h" | |
25 #include "vp8dsp.h" | |
26 | |
27 // TODO: Maybe add dequant | |
28 static void vp8_luma_dc_wht_c(DCTELEM block[4][4][16], DCTELEM dc[16]) | |
29 { | |
30 int i, t0, t1, t2, t3; | |
31 | |
32 for (i = 0; i < 4; i++) { | |
33 t0 = dc[0*4+i] + dc[3*4+i]; | |
34 t1 = dc[1*4+i] + dc[2*4+i]; | |
35 t2 = dc[1*4+i] - dc[2*4+i]; | |
36 t3 = dc[0*4+i] - dc[3*4+i]; | |
37 | |
38 dc[0*4+i] = t0 + t1; | |
39 dc[1*4+i] = t3 + t2; | |
40 dc[2*4+i] = t0 - t1; | |
41 dc[3*4+i] = t3 - t2; | |
42 } | |
43 | |
44 for (i = 0; i < 4; i++) { | |
45 t0 = dc[i*4+0] + dc[i*4+3] + 3; // rounding | |
46 t1 = dc[i*4+1] + dc[i*4+2]; | |
47 t2 = dc[i*4+1] - dc[i*4+2]; | |
48 t3 = dc[i*4+0] - dc[i*4+3] + 3; // rounding | |
12340
2d15f62f4f8a
VP8: move zeroing of luma DC block into the WHT
darkshikari
parents:
12241
diff
changeset
|
49 dc[i*4+0] = 0; |
2d15f62f4f8a
VP8: move zeroing of luma DC block into the WHT
darkshikari
parents:
12241
diff
changeset
|
50 dc[i*4+1] = 0; |
2d15f62f4f8a
VP8: move zeroing of luma DC block into the WHT
darkshikari
parents:
12241
diff
changeset
|
51 dc[i*4+2] = 0; |
2d15f62f4f8a
VP8: move zeroing of luma DC block into the WHT
darkshikari
parents:
12241
diff
changeset
|
52 dc[i*4+3] = 0; |
11921 | 53 |
54 *block[i][0] = (t0 + t1) >> 3; | |
55 *block[i][1] = (t3 + t2) >> 3; | |
56 *block[i][2] = (t0 - t1) >> 3; | |
57 *block[i][3] = (t3 - t2) >> 3; | |
58 } | |
59 } | |
60 | |
61 | |
62 #define MUL_20091(a) ((((a)*20091) >> 16) + (a)) | |
63 #define MUL_35468(a) (((a)*35468) >> 16) | |
64 | |
65 static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], int stride) | |
66 { | |
67 int i, t0, t1, t2, t3; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
68 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
11921 | 69 DCTELEM tmp[16]; |
70 | |
71 for (i = 0; i < 4; i++) { | |
72 t0 = block[0*4+i] + block[2*4+i]; | |
73 t1 = block[0*4+i] - block[2*4+i]; | |
74 t2 = MUL_35468(block[1*4+i]) - MUL_20091(block[3*4+i]); | |
75 t3 = MUL_20091(block[1*4+i]) + MUL_35468(block[3*4+i]); | |
12235
e08d65897115
VP8: clear DCT blocks in iDCT instead of using clear_blocks.
darkshikari
parents:
12194
diff
changeset
|
76 block[0*4+i] = 0; |
e08d65897115
VP8: clear DCT blocks in iDCT instead of using clear_blocks.
darkshikari
parents:
12194
diff
changeset
|
77 block[1*4+i] = 0; |
e08d65897115
VP8: clear DCT blocks in iDCT instead of using clear_blocks.
darkshikari
parents:
12194
diff
changeset
|
78 block[2*4+i] = 0; |
e08d65897115
VP8: clear DCT blocks in iDCT instead of using clear_blocks.
darkshikari
parents:
12194
diff
changeset
|
79 block[3*4+i] = 0; |
11921 | 80 |
81 tmp[i*4+0] = t0 + t3; | |
82 tmp[i*4+1] = t1 + t2; | |
83 tmp[i*4+2] = t1 - t2; | |
84 tmp[i*4+3] = t0 - t3; | |
85 } | |
86 | |
87 for (i = 0; i < 4; i++) { | |
88 t0 = tmp[0*4+i] + tmp[2*4+i]; | |
89 t1 = tmp[0*4+i] - tmp[2*4+i]; | |
90 t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]); | |
91 t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]); | |
92 | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
93 dst[0] = cm[dst[0] + ((t0 + t3 + 4) >> 3)]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
94 dst[1] = cm[dst[1] + ((t1 + t2 + 4) >> 3)]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
95 dst[2] = cm[dst[2] + ((t1 - t2 + 4) >> 3)]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
96 dst[3] = cm[dst[3] + ((t0 - t3 + 4) >> 3)]; |
11921 | 97 dst += stride; |
98 } | |
99 } | |
100 | |
101 static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], int stride) | |
102 { | |
103 int i, dc = (block[0] + 4) >> 3; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
104 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc; |
12235
e08d65897115
VP8: clear DCT blocks in iDCT instead of using clear_blocks.
darkshikari
parents:
12194
diff
changeset
|
105 block[0] = 0; |
11921 | 106 |
107 for (i = 0; i < 4; i++) { | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
108 dst[0] = cm[dst[0]]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
109 dst[1] = cm[dst[1]]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
110 dst[2] = cm[dst[2]]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
111 dst[3] = cm[dst[3]]; |
11921 | 112 dst += stride; |
113 } | |
114 } | |
115 | |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
116 static void vp8_idct_dc_add4uv_c(uint8_t *dst, DCTELEM block[4][16], int stride) |
12238 | 117 { |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
118 vp8_idct_dc_add_c(dst+stride*0+0, block[0], stride); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
119 vp8_idct_dc_add_c(dst+stride*0+4, block[1], stride); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
120 vp8_idct_dc_add_c(dst+stride*4+0, block[2], stride); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
121 vp8_idct_dc_add_c(dst+stride*4+4, block[3], stride); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
122 } |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
123 |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
124 static void vp8_idct_dc_add4y_c(uint8_t *dst, DCTELEM block[4][16], int stride) |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
125 { |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
126 vp8_idct_dc_add_c(dst+ 0, block[0], stride); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
127 vp8_idct_dc_add_c(dst+ 4, block[1], stride); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
128 vp8_idct_dc_add_c(dst+ 8, block[2], stride); |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
129 vp8_idct_dc_add_c(dst+12, block[3], stride); |
12238 | 130 } |
11921 | 131 |
132 // because I like only having two parameters to pass functions... | |
133 #define LOAD_PIXELS\ | |
134 int av_unused p3 = p[-4*stride];\ | |
135 int av_unused p2 = p[-3*stride];\ | |
136 int av_unused p1 = p[-2*stride];\ | |
137 int av_unused p0 = p[-1*stride];\ | |
138 int av_unused q0 = p[ 0*stride];\ | |
139 int av_unused q1 = p[ 1*stride];\ | |
140 int av_unused q2 = p[ 2*stride];\ | |
141 int av_unused q3 = p[ 3*stride]; | |
142 | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
143 #define clip_int8(n) (cm[n+0x80]-0x80) |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
144 |
11921 | 145 static av_always_inline void filter_common(uint8_t *p, int stride, int is4tap) |
146 { | |
147 LOAD_PIXELS | |
148 int a, f1, f2; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
149 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
11921 | 150 |
151 a = 3*(q0 - p0); | |
152 | |
153 if (is4tap) | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
154 a += clip_int8(p1 - q1); |
11921 | 155 |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
156 a = clip_int8(a); |
11921 | 157 |
158 // We deviate from the spec here with c(a+3) >> 3 | |
159 // since that's what libvpx does. | |
160 f1 = FFMIN(a+4, 127) >> 3; | |
161 f2 = FFMIN(a+3, 127) >> 3; | |
162 | |
163 // Despite what the spec says, we do need to clamp here to | |
164 // be bitexact with libvpx. | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
165 p[-1*stride] = cm[p0 + f2]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
166 p[ 0*stride] = cm[q0 - f1]; |
11921 | 167 |
168 // only used for _inner on blocks without high edge variance | |
169 if (!is4tap) { | |
170 a = (f1+1)>>1; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
171 p[-2*stride] = cm[p1 + a]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
172 p[ 1*stride] = cm[q1 - a]; |
11921 | 173 } |
174 } | |
175 | |
176 static av_always_inline int simple_limit(uint8_t *p, int stride, int flim) | |
177 { | |
178 LOAD_PIXELS | |
179 return 2*FFABS(p0-q0) + (FFABS(p1-q1) >> 1) <= flim; | |
180 } | |
181 | |
182 /** | |
183 * E - limit at the macroblock edge | |
184 * I - limit for interior difference | |
185 */ | |
186 static av_always_inline int normal_limit(uint8_t *p, int stride, int E, int I) | |
187 { | |
188 LOAD_PIXELS | |
12081
812e23197d64
VP8: Move calculation of outer filter limit out of dsp functions for normal
conrad
parents:
12011
diff
changeset
|
189 return simple_limit(p, stride, E) |
11921 | 190 && FFABS(p3-p2) <= I && FFABS(p2-p1) <= I && FFABS(p1-p0) <= I |
191 && FFABS(q3-q2) <= I && FFABS(q2-q1) <= I && FFABS(q1-q0) <= I; | |
192 } | |
193 | |
194 // high edge variance | |
195 static av_always_inline int hev(uint8_t *p, int stride, int thresh) | |
196 { | |
197 LOAD_PIXELS | |
198 return FFABS(p1-p0) > thresh || FFABS(q1-q0) > thresh; | |
199 } | |
200 | |
201 static av_always_inline void filter_mbedge(uint8_t *p, int stride) | |
202 { | |
203 int a0, a1, a2, w; | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
204 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
11921 | 205 |
206 LOAD_PIXELS | |
207 | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
208 w = clip_int8(p1-q1); |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
209 w = clip_int8(w + 3*(q0-p0)); |
11921 | 210 |
211 a0 = (27*w + 63) >> 7; | |
212 a1 = (18*w + 63) >> 7; | |
213 a2 = ( 9*w + 63) >> 7; | |
214 | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
215 p[-3*stride] = cm[p2 + a2]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
216 p[-2*stride] = cm[p1 + a1]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
217 p[-1*stride] = cm[p0 + a0]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
218 p[ 0*stride] = cm[q0 - a0]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
219 p[ 1*stride] = cm[q1 - a1]; |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
220 p[ 2*stride] = cm[q2 - a2]; |
11921 | 221 } |
222 | |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
223 #define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline) \ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
224 static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, int stride,\ |
11921 | 225 int flim_E, int flim_I, int hev_thresh)\ |
226 {\ | |
227 int i;\ | |
228 \ | |
229 for (i = 0; i < size; i++)\ | |
230 if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\ | |
231 if (hev(dst+i*stridea, strideb, hev_thresh))\ | |
232 filter_common(dst+i*stridea, strideb, 1);\ | |
233 else\ | |
234 filter_mbedge(dst+i*stridea, strideb);\ | |
235 }\ | |
236 }\ | |
237 \ | |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
238 static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, int stride,\ |
11921 | 239 int flim_E, int flim_I, int hev_thresh)\ |
240 {\ | |
12008 | 241 int i;\ |
11921 | 242 \ |
243 for (i = 0; i < size; i++)\ | |
244 if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\ | |
12008 | 245 int hv = hev(dst+i*stridea, strideb, hev_thresh);\ |
246 if (hv) \ | |
247 filter_common(dst+i*stridea, strideb, 1);\ | |
248 else \ | |
249 filter_common(dst+i*stridea, strideb, 0);\ | |
11921 | 250 }\ |
251 } | |
252 | |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
253 LOOP_FILTER(v, 16, 1, stride,) |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
254 LOOP_FILTER(h, 16, stride, 1,) |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
255 |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
256 #define UV_LOOP_FILTER(dir, stridea, strideb) \ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
257 LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline) \ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
258 static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV, int stride,\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
259 int fE, int fI, int hev_thresh)\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
260 {\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
261 vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
262 vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
263 }\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
264 static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, uint8_t *dstV, int stride,\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
265 int fE, int fI, int hev_thresh)\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
266 {\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
267 vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh);\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
268 vp8_ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, hev_thresh);\ |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
269 } |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
270 |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
271 UV_LOOP_FILTER(v, 1, stride) |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
272 UV_LOOP_FILTER(h, stride, 1) |
11921 | 273 |
274 static void vp8_v_loop_filter_simple_c(uint8_t *dst, int stride, int flim) | |
275 { | |
276 int i; | |
277 | |
278 for (i = 0; i < 16; i++) | |
279 if (simple_limit(dst+i, stride, flim)) | |
280 filter_common(dst+i, stride, 1); | |
281 } | |
282 | |
283 static void vp8_h_loop_filter_simple_c(uint8_t *dst, int stride, int flim) | |
284 { | |
285 int i; | |
286 | |
287 for (i = 0; i < 16; i++) | |
288 if (simple_limit(dst+i*stride, 1, flim)) | |
289 filter_common(dst+i*stride, 1, 1); | |
290 } | |
291 | |
292 static const uint8_t subpel_filters[7][6] = { | |
293 { 0, 6, 123, 12, 1, 0 }, | |
294 { 2, 11, 108, 36, 8, 1 }, | |
295 { 0, 9, 93, 50, 6, 0 }, | |
296 { 3, 16, 77, 77, 16, 3 }, | |
297 { 0, 6, 50, 93, 9, 0 }, | |
298 { 1, 8, 36, 108, 11, 2 }, | |
299 { 0, 1, 12, 123, 6, 0 }, | |
300 }; | |
301 | |
11950 | 302 #define PUT_PIXELS(WIDTH) \ |
303 static void put_vp8_pixels ## WIDTH ##_c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int x, int y) { \ | |
11956 | 304 int i; \ |
305 for (i = 0; i < h; i++, dst+= dststride, src+= srcstride) { \ | |
11950 | 306 memcpy(dst, src, WIDTH); \ |
307 } \ | |
308 } | |
309 | |
310 PUT_PIXELS(16) | |
311 PUT_PIXELS(8) | |
312 PUT_PIXELS(4) | |
11921 | 313 |
314 #define FILTER_6TAP(src, F, stride) \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
315 cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + F[0]*src[x-2*stride] + \ |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
316 F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + F[5]*src[x+3*stride] + 64) >> 7] |
11921 | 317 |
318 #define FILTER_4TAP(src, F, stride) \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
319 cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + \ |
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
320 F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + 64) >> 7] |
11921 | 321 |
322 #define VP8_EPEL_H(SIZE, FILTER, FILTERNAME) \ | |
11950 | 323 static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \ |
11921 | 324 { \ |
325 const uint8_t *filter = subpel_filters[mx-1]; \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
326 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ |
11921 | 327 int x, y; \ |
328 \ | |
329 for (y = 0; y < h; y++) { \ | |
330 for (x = 0; x < SIZE; x++) \ | |
331 dst[x] = FILTER(src, filter, 1); \ | |
11950 | 332 dst += dststride; \ |
333 src += srcstride; \ | |
11921 | 334 } \ |
335 } | |
336 #define VP8_EPEL_V(SIZE, FILTER, FILTERNAME) \ | |
11950 | 337 static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \ |
11921 | 338 { \ |
339 const uint8_t *filter = subpel_filters[my-1]; \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
340 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ |
11921 | 341 int x, y; \ |
342 \ | |
343 for (y = 0; y < h; y++) { \ | |
344 for (x = 0; x < SIZE; x++) \ | |
11950 | 345 dst[x] = FILTER(src, filter, srcstride); \ |
346 dst += dststride; \ | |
347 src += srcstride; \ | |
11921 | 348 } \ |
349 } | |
350 #define VP8_EPEL_HV(SIZE, FILTERX, FILTERY, FILTERNAME) \ | |
11950 | 351 static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \ |
11921 | 352 { \ |
353 const uint8_t *filter = subpel_filters[mx-1]; \ | |
12007
ec7be1d7d5b4
Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents:
11985
diff
changeset
|
354 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ |
11921 | 355 int x, y; \ |
356 uint8_t tmp_array[(2*SIZE+5)*SIZE]; \ | |
357 uint8_t *tmp = tmp_array; \ | |
11950 | 358 src -= 2*srcstride; \ |
11921 | 359 \ |
360 for (y = 0; y < h+5; y++) { \ | |
361 for (x = 0; x < SIZE; x++) \ | |
362 tmp[x] = FILTERX(src, filter, 1); \ | |
363 tmp += SIZE; \ | |
11950 | 364 src += srcstride; \ |
11921 | 365 } \ |
366 \ | |
367 tmp = tmp_array + 2*SIZE; \ | |
368 filter = subpel_filters[my-1]; \ | |
369 \ | |
370 for (y = 0; y < h; y++) { \ | |
371 for (x = 0; x < SIZE; x++) \ | |
372 dst[x] = FILTERY(tmp, filter, SIZE); \ | |
11950 | 373 dst += dststride; \ |
11921 | 374 tmp += SIZE; \ |
375 } \ | |
376 } | |
377 | |
378 VP8_EPEL_H(16, FILTER_4TAP, h4) | |
379 VP8_EPEL_H(8, FILTER_4TAP, h4) | |
380 VP8_EPEL_H(4, FILTER_4TAP, h4) | |
381 VP8_EPEL_H(16, FILTER_6TAP, h6) | |
382 VP8_EPEL_H(8, FILTER_6TAP, h6) | |
383 VP8_EPEL_H(4, FILTER_6TAP, h6) | |
384 VP8_EPEL_V(16, FILTER_4TAP, v4) | |
385 VP8_EPEL_V(8, FILTER_4TAP, v4) | |
386 VP8_EPEL_V(4, FILTER_4TAP, v4) | |
387 VP8_EPEL_V(16, FILTER_6TAP, v6) | |
388 VP8_EPEL_V(8, FILTER_6TAP, v6) | |
389 VP8_EPEL_V(4, FILTER_6TAP, v6) | |
390 VP8_EPEL_HV(16, FILTER_4TAP, FILTER_4TAP, h4v4) | |
391 VP8_EPEL_HV(8, FILTER_4TAP, FILTER_4TAP, h4v4) | |
392 VP8_EPEL_HV(4, FILTER_4TAP, FILTER_4TAP, h4v4) | |
393 VP8_EPEL_HV(16, FILTER_4TAP, FILTER_6TAP, h4v6) | |
394 VP8_EPEL_HV(8, FILTER_4TAP, FILTER_6TAP, h4v6) | |
395 VP8_EPEL_HV(4, FILTER_4TAP, FILTER_6TAP, h4v6) | |
396 VP8_EPEL_HV(16, FILTER_6TAP, FILTER_4TAP, h6v4) | |
397 VP8_EPEL_HV(8, FILTER_6TAP, FILTER_4TAP, h6v4) | |
398 VP8_EPEL_HV(4, FILTER_6TAP, FILTER_4TAP, h6v4) | |
399 VP8_EPEL_HV(16, FILTER_6TAP, FILTER_6TAP, h6v6) | |
400 VP8_EPEL_HV(8, FILTER_6TAP, FILTER_6TAP, h6v6) | |
401 VP8_EPEL_HV(4, FILTER_6TAP, FILTER_6TAP, h6v6) | |
402 | |
11974 | 403 #define VP8_BILINEAR(SIZE) \ |
404 static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \ | |
405 { \ | |
406 int a = 8-mx, b = mx; \ | |
407 int x, y; \ | |
408 \ | |
409 for (y = 0; y < h; y++) { \ | |
410 for (x = 0; x < SIZE; x++) \ | |
411 dst[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \ | |
412 dst += stride; \ | |
413 src += stride; \ | |
414 } \ | |
415 } \ | |
416 static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \ | |
417 { \ | |
418 int c = 8-my, d = my; \ | |
419 int x, y; \ | |
420 \ | |
421 for (y = 0; y < h; y++) { \ | |
422 for (x = 0; x < SIZE; x++) \ | |
423 dst[x] = (c*src[x] + d*src[x+stride] + 4) >> 3; \ | |
424 dst += stride; \ | |
425 src += stride; \ | |
426 } \ | |
427 } \ | |
428 \ | |
429 static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \ | |
430 { \ | |
431 int a = 8-mx, b = mx; \ | |
432 int c = 8-my, d = my; \ | |
433 int x, y; \ | |
434 uint8_t tmp_array[(2*SIZE+1)*SIZE]; \ | |
435 uint8_t *tmp = tmp_array; \ | |
436 \ | |
437 for (y = 0; y < h+1; y++) { \ | |
438 for (x = 0; x < SIZE; x++) \ | |
439 tmp[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \ | |
440 tmp += SIZE; \ | |
441 src += stride; \ | |
442 } \ | |
443 \ | |
444 tmp = tmp_array; \ | |
445 \ | |
446 for (y = 0; y < h; y++) { \ | |
447 for (x = 0; x < SIZE; x++) \ | |
448 dst[x] = (c*tmp[x] + d*tmp[x+SIZE] + 4) >> 3; \ | |
449 dst += stride; \ | |
450 tmp += SIZE; \ | |
451 } \ | |
452 } | |
453 | |
454 VP8_BILINEAR(16) | |
455 VP8_BILINEAR(8) | |
456 VP8_BILINEAR(4) | |
457 | |
11921 | 458 #define VP8_MC_FUNC(IDX, SIZE) \ |
11950 | 459 dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ |
11921 | 460 dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \ |
461 dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \ | |
462 dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \ | |
463 dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \ | |
464 dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \ | |
465 dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \ | |
466 dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \ | |
467 dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c | |
468 | |
11974 | 469 #define VP8_BILINEAR_MC_FUNC(IDX, SIZE) \ |
470 dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ | |
471 dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \ | |
472 dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \ | |
473 dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] = put_vp8_bilinear ## SIZE ## _v_c; \ | |
474 dsp->put_vp8_bilinear_pixels_tab[IDX][1][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ | |
475 dsp->put_vp8_bilinear_pixels_tab[IDX][1][2] = put_vp8_bilinear ## SIZE ## _hv_c; \ | |
476 dsp->put_vp8_bilinear_pixels_tab[IDX][2][0] = put_vp8_bilinear ## SIZE ## _v_c; \ | |
477 dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ | |
478 dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c | |
479 | |
11921 | 480 av_cold void ff_vp8dsp_init(VP8DSPContext *dsp) |
481 { | |
12241
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
482 dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c; |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
483 dsp->vp8_idct_add = vp8_idct_add_c; |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
484 dsp->vp8_idct_dc_add = vp8_idct_dc_add_c; |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
485 dsp->vp8_idct_dc_add4y = vp8_idct_dc_add4y_c; |
c7f6ddcc5c01
VP8: optimize DC-only chroma case in the same way as luma.
darkshikari
parents:
12238
diff
changeset
|
486 dsp->vp8_idct_dc_add4uv = vp8_idct_dc_add4uv_c; |
11921 | 487 |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
488 dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c; |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
489 dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c; |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
490 dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c; |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
491 dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c; |
11921 | 492 |
12194
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
493 dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c; |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
494 dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c; |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
495 dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c; |
80b142c2e9f7
Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents:
12081
diff
changeset
|
496 dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c; |
11921 | 497 |
498 dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c; | |
499 dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c; | |
500 | |
501 VP8_MC_FUNC(0, 16); | |
502 VP8_MC_FUNC(1, 8); | |
503 VP8_MC_FUNC(2, 4); | |
11974 | 504 |
505 VP8_BILINEAR_MC_FUNC(0, 16); | |
506 VP8_BILINEAR_MC_FUNC(1, 8); | |
507 VP8_BILINEAR_MC_FUNC(2, 4); | |
11975 | 508 |
11985 | 509 if (HAVE_MMX) |
11975 | 510 ff_vp8dsp_init_x86(dsp); |
12011 | 511 if (HAVE_ALTIVEC) |
512 ff_vp8dsp_init_altivec(dsp); | |
11921 | 513 } |