annotate vp8dsp.c @ 12194:80b142c2e9f7 libavcodec

Change function prototypes for width=8 inner and mbedge loopfilter functions so that it does both U and V planes at the same time. This will have speed advantages when using SSE2 (or higher) optimizations, since we can do both the U and V rows together in a single xmm register. This also renames filter16 to filter16y and filter8 to filter8uv so that it's more obvious what each function is used for.
author rbultje
date Mon, 19 Jul 2010 21:18:04 +0000
parents 812e23197d64
children e08d65897115
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
1 /**
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
2 * VP8 compatible video decoder
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
3 *
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
4 * Copyright (C) 2010 David Conrad
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
5 * Copyright (C) 2010 Ronald S. Bultje
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
6 *
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
7 * This file is part of FFmpeg.
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
8 *
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
9 * FFmpeg is free software; you can redistribute it and/or
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
10 * modify it under the terms of the GNU Lesser General Public
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
11 * License as published by the Free Software Foundation; either
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
12 * version 2.1 of the License, or (at your option) any later version.
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
13 *
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
14 * FFmpeg is distributed in the hope that it will be useful,
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
17 * Lesser General Public License for more details.
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
18 *
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
19 * You should have received a copy of the GNU Lesser General Public
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
20 * License along with FFmpeg; if not, write to the Free Software
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
22 */
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
23
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
24 #include "dsputil.h"
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
25 #include "vp8dsp.h"
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
26
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
27 // TODO: Maybe add dequant
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
28 static void vp8_luma_dc_wht_c(DCTELEM block[4][4][16], DCTELEM dc[16])
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
29 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
30 int i, t0, t1, t2, t3;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
31
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
32 for (i = 0; i < 4; i++) {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
33 t0 = dc[0*4+i] + dc[3*4+i];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
34 t1 = dc[1*4+i] + dc[2*4+i];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
35 t2 = dc[1*4+i] - dc[2*4+i];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
36 t3 = dc[0*4+i] - dc[3*4+i];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
37
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
38 dc[0*4+i] = t0 + t1;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
39 dc[1*4+i] = t3 + t2;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
40 dc[2*4+i] = t0 - t1;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
41 dc[3*4+i] = t3 - t2;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
42 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
43
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
44 for (i = 0; i < 4; i++) {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
45 t0 = dc[i*4+0] + dc[i*4+3] + 3; // rounding
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
46 t1 = dc[i*4+1] + dc[i*4+2];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
47 t2 = dc[i*4+1] - dc[i*4+2];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
48 t3 = dc[i*4+0] - dc[i*4+3] + 3; // rounding
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
49
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
50 *block[i][0] = (t0 + t1) >> 3;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
51 *block[i][1] = (t3 + t2) >> 3;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
52 *block[i][2] = (t0 - t1) >> 3;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
53 *block[i][3] = (t3 - t2) >> 3;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
54 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
55 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
56
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
57
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
58 #define MUL_20091(a) ((((a)*20091) >> 16) + (a))
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
59 #define MUL_35468(a) (((a)*35468) >> 16)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
60
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
61 static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], int stride)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
62 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
63 int i, t0, t1, t2, t3;
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
64 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
65 DCTELEM tmp[16];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
66
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
67 for (i = 0; i < 4; i++) {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
68 t0 = block[0*4+i] + block[2*4+i];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
69 t1 = block[0*4+i] - block[2*4+i];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
70 t2 = MUL_35468(block[1*4+i]) - MUL_20091(block[3*4+i]);
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
71 t3 = MUL_20091(block[1*4+i]) + MUL_35468(block[3*4+i]);
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
72
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
73 tmp[i*4+0] = t0 + t3;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
74 tmp[i*4+1] = t1 + t2;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
75 tmp[i*4+2] = t1 - t2;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
76 tmp[i*4+3] = t0 - t3;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
77 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
78
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
79 for (i = 0; i < 4; i++) {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
80 t0 = tmp[0*4+i] + tmp[2*4+i];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
81 t1 = tmp[0*4+i] - tmp[2*4+i];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
82 t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]);
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
83 t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]);
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
84
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
85 dst[0] = cm[dst[0] + ((t0 + t3 + 4) >> 3)];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
86 dst[1] = cm[dst[1] + ((t1 + t2 + 4) >> 3)];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
87 dst[2] = cm[dst[2] + ((t1 - t2 + 4) >> 3)];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
88 dst[3] = cm[dst[3] + ((t0 - t3 + 4) >> 3)];
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
89 dst += stride;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
90 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
91 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
92
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
93 static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], int stride)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
94 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
95 int i, dc = (block[0] + 4) >> 3;
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
96 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
97
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
98 for (i = 0; i < 4; i++) {
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
99 dst[0] = cm[dst[0]];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
100 dst[1] = cm[dst[1]];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
101 dst[2] = cm[dst[2]];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
102 dst[3] = cm[dst[3]];
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
103 dst += stride;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
104 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
105 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
106
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
107
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
108 // because I like only having two parameters to pass functions...
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
109 #define LOAD_PIXELS\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
110 int av_unused p3 = p[-4*stride];\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
111 int av_unused p2 = p[-3*stride];\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
112 int av_unused p1 = p[-2*stride];\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
113 int av_unused p0 = p[-1*stride];\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
114 int av_unused q0 = p[ 0*stride];\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
115 int av_unused q1 = p[ 1*stride];\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
116 int av_unused q2 = p[ 2*stride];\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
117 int av_unused q3 = p[ 3*stride];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
118
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
119 #define clip_int8(n) (cm[n+0x80]-0x80)
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
120
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
121 static av_always_inline void filter_common(uint8_t *p, int stride, int is4tap)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
122 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
123 LOAD_PIXELS
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
124 int a, f1, f2;
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
125 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
126
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
127 a = 3*(q0 - p0);
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
128
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
129 if (is4tap)
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
130 a += clip_int8(p1 - q1);
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
131
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
132 a = clip_int8(a);
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
133
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
134 // We deviate from the spec here with c(a+3) >> 3
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
135 // since that's what libvpx does.
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
136 f1 = FFMIN(a+4, 127) >> 3;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
137 f2 = FFMIN(a+3, 127) >> 3;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
138
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
139 // Despite what the spec says, we do need to clamp here to
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
140 // be bitexact with libvpx.
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
141 p[-1*stride] = cm[p0 + f2];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
142 p[ 0*stride] = cm[q0 - f1];
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
143
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
144 // only used for _inner on blocks without high edge variance
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
145 if (!is4tap) {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
146 a = (f1+1)>>1;
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
147 p[-2*stride] = cm[p1 + a];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
148 p[ 1*stride] = cm[q1 - a];
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
149 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
150 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
151
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
152 static av_always_inline int simple_limit(uint8_t *p, int stride, int flim)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
153 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
154 LOAD_PIXELS
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
155 return 2*FFABS(p0-q0) + (FFABS(p1-q1) >> 1) <= flim;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
156 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
157
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
158 /**
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
159 * E - limit at the macroblock edge
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
160 * I - limit for interior difference
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
161 */
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
162 static av_always_inline int normal_limit(uint8_t *p, int stride, int E, int I)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
163 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
164 LOAD_PIXELS
12081
812e23197d64 VP8: Move calculation of outer filter limit out of dsp functions for normal
conrad
parents: 12011
diff changeset
165 return simple_limit(p, stride, E)
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
166 && FFABS(p3-p2) <= I && FFABS(p2-p1) <= I && FFABS(p1-p0) <= I
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
167 && FFABS(q3-q2) <= I && FFABS(q2-q1) <= I && FFABS(q1-q0) <= I;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
168 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
169
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
170 // high edge variance
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
171 static av_always_inline int hev(uint8_t *p, int stride, int thresh)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
172 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
173 LOAD_PIXELS
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
174 return FFABS(p1-p0) > thresh || FFABS(q1-q0) > thresh;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
175 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
176
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
177 static av_always_inline void filter_mbedge(uint8_t *p, int stride)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
178 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
179 int a0, a1, a2, w;
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
180 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
181
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
182 LOAD_PIXELS
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
183
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
184 w = clip_int8(p1-q1);
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
185 w = clip_int8(w + 3*(q0-p0));
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
186
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
187 a0 = (27*w + 63) >> 7;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
188 a1 = (18*w + 63) >> 7;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
189 a2 = ( 9*w + 63) >> 7;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
190
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
191 p[-3*stride] = cm[p2 + a2];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
192 p[-2*stride] = cm[p1 + a1];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
193 p[-1*stride] = cm[p0 + a0];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
194 p[ 0*stride] = cm[q0 - a0];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
195 p[ 1*stride] = cm[q1 - a1];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
196 p[ 2*stride] = cm[q2 - a2];
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
197 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
198
12194
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
199 #define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline) \
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
200 static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, int stride,\
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
201 int flim_E, int flim_I, int hev_thresh)\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
202 {\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
203 int i;\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
204 \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
205 for (i = 0; i < size; i++)\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
206 if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
207 if (hev(dst+i*stridea, strideb, hev_thresh))\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
208 filter_common(dst+i*stridea, strideb, 1);\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
209 else\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
210 filter_mbedge(dst+i*stridea, strideb);\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
211 }\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
212 }\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
213 \
12194
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
214 static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, int stride,\
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
215 int flim_E, int flim_I, int hev_thresh)\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
216 {\
12008
98fd80705850 Faster C VP8 normal inner loop filter
darkshikari
parents: 12007
diff changeset
217 int i;\
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
218 \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
219 for (i = 0; i < size; i++)\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
220 if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\
12008
98fd80705850 Faster C VP8 normal inner loop filter
darkshikari
parents: 12007
diff changeset
221 int hv = hev(dst+i*stridea, strideb, hev_thresh);\
98fd80705850 Faster C VP8 normal inner loop filter
darkshikari
parents: 12007
diff changeset
222 if (hv) \
98fd80705850 Faster C VP8 normal inner loop filter
darkshikari
parents: 12007
diff changeset
223 filter_common(dst+i*stridea, strideb, 1);\
98fd80705850 Faster C VP8 normal inner loop filter
darkshikari
parents: 12007
diff changeset
224 else \
98fd80705850 Faster C VP8 normal inner loop filter
darkshikari
parents: 12007
diff changeset
225 filter_common(dst+i*stridea, strideb, 0);\
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
226 }\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
227 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
228
12194
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
229 LOOP_FILTER(v, 16, 1, stride,)
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
230 LOOP_FILTER(h, 16, stride, 1,)
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
231
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
232 #define UV_LOOP_FILTER(dir, stridea, strideb) \
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
233 LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline) \
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
234 static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV, int stride,\
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
235 int fE, int fI, int hev_thresh)\
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
236 {\
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
237 vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);\
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
238 vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);\
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
239 }\
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
240 static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, uint8_t *dstV, int stride,\
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
241 int fE, int fI, int hev_thresh)\
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
242 {\
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
243 vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh);\
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
244 vp8_ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, hev_thresh);\
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
245 }
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
246
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
247 UV_LOOP_FILTER(v, 1, stride)
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
248 UV_LOOP_FILTER(h, stride, 1)
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
249
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
250 static void vp8_v_loop_filter_simple_c(uint8_t *dst, int stride, int flim)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
251 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
252 int i;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
253
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
254 for (i = 0; i < 16; i++)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
255 if (simple_limit(dst+i, stride, flim))
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
256 filter_common(dst+i, stride, 1);
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
257 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
258
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
259 static void vp8_h_loop_filter_simple_c(uint8_t *dst, int stride, int flim)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
260 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
261 int i;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
262
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
263 for (i = 0; i < 16; i++)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
264 if (simple_limit(dst+i*stride, 1, flim))
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
265 filter_common(dst+i*stride, 1, 1);
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
266 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
267
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
268 static const uint8_t subpel_filters[7][6] = {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
269 { 0, 6, 123, 12, 1, 0 },
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
270 { 2, 11, 108, 36, 8, 1 },
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
271 { 0, 9, 93, 50, 6, 0 },
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
272 { 3, 16, 77, 77, 16, 3 },
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
273 { 0, 6, 50, 93, 9, 0 },
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
274 { 1, 8, 36, 108, 11, 2 },
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
275 { 0, 1, 12, 123, 6, 0 },
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
276 };
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
277
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
278 #define PUT_PIXELS(WIDTH) \
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
279 static void put_vp8_pixels ## WIDTH ##_c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int x, int y) { \
11956
496d1300204d Really fix r23782
darkshikari
parents: 11954
diff changeset
280 int i; \
496d1300204d Really fix r23782
darkshikari
parents: 11954
diff changeset
281 for (i = 0; i < h; i++, dst+= dststride, src+= srcstride) { \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
282 memcpy(dst, src, WIDTH); \
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
283 } \
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
284 }
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
285
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
286 PUT_PIXELS(16)
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
287 PUT_PIXELS(8)
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
288 PUT_PIXELS(4)
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
289
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
290 #define FILTER_6TAP(src, F, stride) \
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
291 cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + F[0]*src[x-2*stride] + \
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
292 F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + F[5]*src[x+3*stride] + 64) >> 7]
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
293
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
294 #define FILTER_4TAP(src, F, stride) \
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
295 cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + \
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
296 F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + 64) >> 7]
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
297
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
298 #define VP8_EPEL_H(SIZE, FILTER, FILTERNAME) \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
299 static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
300 { \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
301 const uint8_t *filter = subpel_filters[mx-1]; \
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
302 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
303 int x, y; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
304 \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
305 for (y = 0; y < h; y++) { \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
306 for (x = 0; x < SIZE; x++) \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
307 dst[x] = FILTER(src, filter, 1); \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
308 dst += dststride; \
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
309 src += srcstride; \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
310 } \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
311 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
312 #define VP8_EPEL_V(SIZE, FILTER, FILTERNAME) \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
313 static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
314 { \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
315 const uint8_t *filter = subpel_filters[my-1]; \
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
316 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
317 int x, y; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
318 \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
319 for (y = 0; y < h; y++) { \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
320 for (x = 0; x < SIZE; x++) \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
321 dst[x] = FILTER(src, filter, srcstride); \
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
322 dst += dststride; \
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
323 src += srcstride; \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
324 } \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
325 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
326 #define VP8_EPEL_HV(SIZE, FILTERX, FILTERY, FILTERNAME) \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
327 static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
328 { \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
329 const uint8_t *filter = subpel_filters[mx-1]; \
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
330 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
331 int x, y; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
332 uint8_t tmp_array[(2*SIZE+5)*SIZE]; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
333 uint8_t *tmp = tmp_array; \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
334 src -= 2*srcstride; \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
335 \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
336 for (y = 0; y < h+5; y++) { \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
337 for (x = 0; x < SIZE; x++) \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
338 tmp[x] = FILTERX(src, filter, 1); \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
339 tmp += SIZE; \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
340 src += srcstride; \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
341 } \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
342 \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
343 tmp = tmp_array + 2*SIZE; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
344 filter = subpel_filters[my-1]; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
345 \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
346 for (y = 0; y < h; y++) { \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
347 for (x = 0; x < SIZE; x++) \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
348 dst[x] = FILTERY(tmp, filter, SIZE); \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
349 dst += dststride; \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
350 tmp += SIZE; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
351 } \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
352 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
353
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
354 VP8_EPEL_H(16, FILTER_4TAP, h4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
355 VP8_EPEL_H(8, FILTER_4TAP, h4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
356 VP8_EPEL_H(4, FILTER_4TAP, h4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
357 VP8_EPEL_H(16, FILTER_6TAP, h6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
358 VP8_EPEL_H(8, FILTER_6TAP, h6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
359 VP8_EPEL_H(4, FILTER_6TAP, h6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
360 VP8_EPEL_V(16, FILTER_4TAP, v4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
361 VP8_EPEL_V(8, FILTER_4TAP, v4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
362 VP8_EPEL_V(4, FILTER_4TAP, v4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
363 VP8_EPEL_V(16, FILTER_6TAP, v6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
364 VP8_EPEL_V(8, FILTER_6TAP, v6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
365 VP8_EPEL_V(4, FILTER_6TAP, v6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
366 VP8_EPEL_HV(16, FILTER_4TAP, FILTER_4TAP, h4v4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
367 VP8_EPEL_HV(8, FILTER_4TAP, FILTER_4TAP, h4v4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
368 VP8_EPEL_HV(4, FILTER_4TAP, FILTER_4TAP, h4v4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
369 VP8_EPEL_HV(16, FILTER_4TAP, FILTER_6TAP, h4v6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
370 VP8_EPEL_HV(8, FILTER_4TAP, FILTER_6TAP, h4v6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
371 VP8_EPEL_HV(4, FILTER_4TAP, FILTER_6TAP, h4v6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
372 VP8_EPEL_HV(16, FILTER_6TAP, FILTER_4TAP, h6v4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
373 VP8_EPEL_HV(8, FILTER_6TAP, FILTER_4TAP, h6v4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
374 VP8_EPEL_HV(4, FILTER_6TAP, FILTER_4TAP, h6v4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
375 VP8_EPEL_HV(16, FILTER_6TAP, FILTER_6TAP, h6v6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
376 VP8_EPEL_HV(8, FILTER_6TAP, FILTER_6TAP, h6v6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
377 VP8_EPEL_HV(4, FILTER_6TAP, FILTER_6TAP, h6v6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
378
11974
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
379 #define VP8_BILINEAR(SIZE) \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
380 static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
381 { \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
382 int a = 8-mx, b = mx; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
383 int x, y; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
384 \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
385 for (y = 0; y < h; y++) { \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
386 for (x = 0; x < SIZE; x++) \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
387 dst[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
388 dst += stride; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
389 src += stride; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
390 } \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
391 } \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
392 static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
393 { \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
394 int c = 8-my, d = my; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
395 int x, y; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
396 \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
397 for (y = 0; y < h; y++) { \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
398 for (x = 0; x < SIZE; x++) \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
399 dst[x] = (c*src[x] + d*src[x+stride] + 4) >> 3; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
400 dst += stride; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
401 src += stride; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
402 } \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
403 } \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
404 \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
405 static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
406 { \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
407 int a = 8-mx, b = mx; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
408 int c = 8-my, d = my; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
409 int x, y; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
410 uint8_t tmp_array[(2*SIZE+1)*SIZE]; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
411 uint8_t *tmp = tmp_array; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
412 \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
413 for (y = 0; y < h+1; y++) { \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
414 for (x = 0; x < SIZE; x++) \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
415 tmp[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
416 tmp += SIZE; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
417 src += stride; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
418 } \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
419 \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
420 tmp = tmp_array; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
421 \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
422 for (y = 0; y < h; y++) { \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
423 for (x = 0; x < SIZE; x++) \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
424 dst[x] = (c*tmp[x] + d*tmp[x+SIZE] + 4) >> 3; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
425 dst += stride; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
426 tmp += SIZE; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
427 } \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
428 }
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
429
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
430 VP8_BILINEAR(16)
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
431 VP8_BILINEAR(8)
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
432 VP8_BILINEAR(4)
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
433
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
434 #define VP8_MC_FUNC(IDX, SIZE) \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
435 dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
436 dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
437 dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
438 dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
439 dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
440 dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
441 dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
442 dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
443 dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
444
11974
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
445 #define VP8_BILINEAR_MC_FUNC(IDX, SIZE) \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
446 dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
447 dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
448 dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
449 dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] = put_vp8_bilinear ## SIZE ## _v_c; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
450 dsp->put_vp8_bilinear_pixels_tab[IDX][1][1] = put_vp8_bilinear ## SIZE ## _hv_c; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
451 dsp->put_vp8_bilinear_pixels_tab[IDX][1][2] = put_vp8_bilinear ## SIZE ## _hv_c; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
452 dsp->put_vp8_bilinear_pixels_tab[IDX][2][0] = put_vp8_bilinear ## SIZE ## _v_c; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
453 dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
454 dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
455
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
456 av_cold void ff_vp8dsp_init(VP8DSPContext *dsp)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
457 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
458 dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
459 dsp->vp8_idct_add = vp8_idct_add_c;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
460 dsp->vp8_idct_dc_add = vp8_idct_dc_add_c;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
461
12194
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
462 dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c;
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
463 dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c;
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
464 dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c;
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
465 dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c;
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
466
12194
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
467 dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c;
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
468 dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c;
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
469 dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c;
80b142c2e9f7 Change function prototypes for width=8 inner and mbedge loopfilter functions
rbultje
parents: 12081
diff changeset
470 dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c;
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
471
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
472 dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
473 dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
474
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
475 VP8_MC_FUNC(0, 16);
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
476 VP8_MC_FUNC(1, 8);
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
477 VP8_MC_FUNC(2, 4);
11974
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
478
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
479 VP8_BILINEAR_MC_FUNC(0, 16);
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
480 VP8_BILINEAR_MC_FUNC(1, 8);
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
481 VP8_BILINEAR_MC_FUNC(2, 4);
11975
c3afb5be0d9b First shot at VP8 optimizations:
rbultje
parents: 11974
diff changeset
482
11985
1cffcc7b1470 Fix linking if MMX is disabled.
stefano
parents: 11975
diff changeset
483 if (HAVE_MMX)
11975
c3afb5be0d9b First shot at VP8 optimizations:
rbultje
parents: 11974
diff changeset
484 ff_vp8dsp_init_x86(dsp);
12011
f96187e79438 Altivec VP8 MC functions
conrad
parents: 12008
diff changeset
485 if (HAVE_ALTIVEC)
f96187e79438 Altivec VP8 MC functions
conrad
parents: 12008
diff changeset
486 ff_vp8dsp_init_altivec(dsp);
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
487 }