annotate vp8dsp.c @ 12043:f9a0bd0888a4 libavcodec

mpegaudio: call ff_mpegaudiodec_init_mmx() only from float decoder The mmx code is floating-point only, and this function does not know from which decoder it is called. Without this change, the integer decoder only "works" because the size of the context struct is smaller in this case, and the mmx init function writes the function pointer outside the allocated context.
author mru
date Thu, 01 Jul 2010 23:21:17 +0000
parents f96187e79438
children 812e23197d64
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
1 /**
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
2 * VP8 compatible video decoder
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
3 *
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
4 * Copyright (C) 2010 David Conrad
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
5 * Copyright (C) 2010 Ronald S. Bultje
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
6 *
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
7 * This file is part of FFmpeg.
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
8 *
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
9 * FFmpeg is free software; you can redistribute it and/or
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
10 * modify it under the terms of the GNU Lesser General Public
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
11 * License as published by the Free Software Foundation; either
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
12 * version 2.1 of the License, or (at your option) any later version.
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
13 *
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
14 * FFmpeg is distributed in the hope that it will be useful,
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
17 * Lesser General Public License for more details.
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
18 *
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
19 * You should have received a copy of the GNU Lesser General Public
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
20 * License along with FFmpeg; if not, write to the Free Software
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
22 */
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
23
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
24 #include "dsputil.h"
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
25 #include "vp8dsp.h"
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
26
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
27 // TODO: Maybe add dequant
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
28 static void vp8_luma_dc_wht_c(DCTELEM block[4][4][16], DCTELEM dc[16])
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
29 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
30 int i, t0, t1, t2, t3;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
31
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
32 for (i = 0; i < 4; i++) {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
33 t0 = dc[0*4+i] + dc[3*4+i];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
34 t1 = dc[1*4+i] + dc[2*4+i];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
35 t2 = dc[1*4+i] - dc[2*4+i];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
36 t3 = dc[0*4+i] - dc[3*4+i];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
37
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
38 dc[0*4+i] = t0 + t1;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
39 dc[1*4+i] = t3 + t2;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
40 dc[2*4+i] = t0 - t1;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
41 dc[3*4+i] = t3 - t2;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
42 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
43
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
44 for (i = 0; i < 4; i++) {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
45 t0 = dc[i*4+0] + dc[i*4+3] + 3; // rounding
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
46 t1 = dc[i*4+1] + dc[i*4+2];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
47 t2 = dc[i*4+1] - dc[i*4+2];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
48 t3 = dc[i*4+0] - dc[i*4+3] + 3; // rounding
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
49
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
50 *block[i][0] = (t0 + t1) >> 3;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
51 *block[i][1] = (t3 + t2) >> 3;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
52 *block[i][2] = (t0 - t1) >> 3;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
53 *block[i][3] = (t3 - t2) >> 3;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
54 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
55 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
56
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
57
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
58 #define MUL_20091(a) ((((a)*20091) >> 16) + (a))
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
59 #define MUL_35468(a) (((a)*35468) >> 16)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
60
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
61 static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], int stride)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
62 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
63 int i, t0, t1, t2, t3;
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
64 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
65 DCTELEM tmp[16];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
66
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
67 for (i = 0; i < 4; i++) {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
68 t0 = block[0*4+i] + block[2*4+i];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
69 t1 = block[0*4+i] - block[2*4+i];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
70 t2 = MUL_35468(block[1*4+i]) - MUL_20091(block[3*4+i]);
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
71 t3 = MUL_20091(block[1*4+i]) + MUL_35468(block[3*4+i]);
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
72
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
73 tmp[i*4+0] = t0 + t3;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
74 tmp[i*4+1] = t1 + t2;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
75 tmp[i*4+2] = t1 - t2;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
76 tmp[i*4+3] = t0 - t3;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
77 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
78
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
79 for (i = 0; i < 4; i++) {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
80 t0 = tmp[0*4+i] + tmp[2*4+i];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
81 t1 = tmp[0*4+i] - tmp[2*4+i];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
82 t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]);
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
83 t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]);
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
84
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
85 dst[0] = cm[dst[0] + ((t0 + t3 + 4) >> 3)];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
86 dst[1] = cm[dst[1] + ((t1 + t2 + 4) >> 3)];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
87 dst[2] = cm[dst[2] + ((t1 - t2 + 4) >> 3)];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
88 dst[3] = cm[dst[3] + ((t0 - t3 + 4) >> 3)];
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
89 dst += stride;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
90 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
91 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
92
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
93 static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], int stride)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
94 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
95 int i, dc = (block[0] + 4) >> 3;
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
96 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc;
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
97
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
98 for (i = 0; i < 4; i++) {
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
99 dst[0] = cm[dst[0]];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
100 dst[1] = cm[dst[1]];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
101 dst[2] = cm[dst[2]];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
102 dst[3] = cm[dst[3]];
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
103 dst += stride;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
104 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
105 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
106
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
107
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
108 // because I like only having two parameters to pass functions...
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
109 #define LOAD_PIXELS\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
110 int av_unused p3 = p[-4*stride];\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
111 int av_unused p2 = p[-3*stride];\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
112 int av_unused p1 = p[-2*stride];\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
113 int av_unused p0 = p[-1*stride];\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
114 int av_unused q0 = p[ 0*stride];\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
115 int av_unused q1 = p[ 1*stride];\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
116 int av_unused q2 = p[ 2*stride];\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
117 int av_unused q3 = p[ 3*stride];
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
118
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
119 #define clip_int8(n) (cm[n+0x80]-0x80)
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
120
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
121 static av_always_inline void filter_common(uint8_t *p, int stride, int is4tap)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
122 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
123 LOAD_PIXELS
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
124 int a, f1, f2;
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
125 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
126
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
127 a = 3*(q0 - p0);
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
128
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
129 if (is4tap)
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
130 a += clip_int8(p1 - q1);
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
131
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
132 a = clip_int8(a);
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
133
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
134 // We deviate from the spec here with c(a+3) >> 3
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
135 // since that's what libvpx does.
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
136 f1 = FFMIN(a+4, 127) >> 3;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
137 f2 = FFMIN(a+3, 127) >> 3;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
138
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
139 // Despite what the spec says, we do need to clamp here to
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
140 // be bitexact with libvpx.
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
141 p[-1*stride] = cm[p0 + f2];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
142 p[ 0*stride] = cm[q0 - f1];
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
143
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
144 // only used for _inner on blocks without high edge variance
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
145 if (!is4tap) {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
146 a = (f1+1)>>1;
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
147 p[-2*stride] = cm[p1 + a];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
148 p[ 1*stride] = cm[q1 - a];
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
149 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
150 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
151
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
152 static av_always_inline int simple_limit(uint8_t *p, int stride, int flim)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
153 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
154 LOAD_PIXELS
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
155 return 2*FFABS(p0-q0) + (FFABS(p1-q1) >> 1) <= flim;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
156 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
157
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
158 /**
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
159 * E - limit at the macroblock edge
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
160 * I - limit for interior difference
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
161 */
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
162 static av_always_inline int normal_limit(uint8_t *p, int stride, int E, int I)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
163 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
164 LOAD_PIXELS
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
165 return simple_limit(p, stride, 2*E+I)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
166 && FFABS(p3-p2) <= I && FFABS(p2-p1) <= I && FFABS(p1-p0) <= I
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
167 && FFABS(q3-q2) <= I && FFABS(q2-q1) <= I && FFABS(q1-q0) <= I;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
168 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
169
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
170 // high edge variance
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
171 static av_always_inline int hev(uint8_t *p, int stride, int thresh)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
172 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
173 LOAD_PIXELS
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
174 return FFABS(p1-p0) > thresh || FFABS(q1-q0) > thresh;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
175 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
176
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
177 static av_always_inline void filter_mbedge(uint8_t *p, int stride)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
178 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
179 int a0, a1, a2, w;
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
180 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
181
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
182 LOAD_PIXELS
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
183
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
184 w = clip_int8(p1-q1);
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
185 w = clip_int8(w + 3*(q0-p0));
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
186
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
187 a0 = (27*w + 63) >> 7;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
188 a1 = (18*w + 63) >> 7;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
189 a2 = ( 9*w + 63) >> 7;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
190
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
191 p[-3*stride] = cm[p2 + a2];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
192 p[-2*stride] = cm[p1 + a1];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
193 p[-1*stride] = cm[p0 + a0];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
194 p[ 0*stride] = cm[q0 - a0];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
195 p[ 1*stride] = cm[q1 - a1];
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
196 p[ 2*stride] = cm[q2 - a2];
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
197 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
198
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
199 #define LOOP_FILTER(dir, size, stridea, strideb) \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
200 static void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, int stride,\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
201 int flim_E, int flim_I, int hev_thresh)\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
202 {\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
203 int i;\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
204 \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
205 for (i = 0; i < size; i++)\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
206 if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
207 if (hev(dst+i*stridea, strideb, hev_thresh))\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
208 filter_common(dst+i*stridea, strideb, 1);\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
209 else\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
210 filter_mbedge(dst+i*stridea, strideb);\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
211 }\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
212 }\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
213 \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
214 static void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, int stride,\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
215 int flim_E, int flim_I, int hev_thresh)\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
216 {\
12008
98fd80705850 Faster C VP8 normal inner loop filter
darkshikari
parents: 12007
diff changeset
217 int i;\
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
218 \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
219 for (i = 0; i < size; i++)\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
220 if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\
12008
98fd80705850 Faster C VP8 normal inner loop filter
darkshikari
parents: 12007
diff changeset
221 int hv = hev(dst+i*stridea, strideb, hev_thresh);\
98fd80705850 Faster C VP8 normal inner loop filter
darkshikari
parents: 12007
diff changeset
222 if (hv) \
98fd80705850 Faster C VP8 normal inner loop filter
darkshikari
parents: 12007
diff changeset
223 filter_common(dst+i*stridea, strideb, 1);\
98fd80705850 Faster C VP8 normal inner loop filter
darkshikari
parents: 12007
diff changeset
224 else \
98fd80705850 Faster C VP8 normal inner loop filter
darkshikari
parents: 12007
diff changeset
225 filter_common(dst+i*stridea, strideb, 0);\
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
226 }\
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
227 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
228
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
229 LOOP_FILTER(v, 16, 1, stride)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
230 LOOP_FILTER(h, 16, stride, 1)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
231 LOOP_FILTER(v, 8, 1, stride)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
232 LOOP_FILTER(h, 8, stride, 1)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
233
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
234 static void vp8_v_loop_filter_simple_c(uint8_t *dst, int stride, int flim)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
235 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
236 int i;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
237
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
238 for (i = 0; i < 16; i++)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
239 if (simple_limit(dst+i, stride, flim))
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
240 filter_common(dst+i, stride, 1);
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
241 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
242
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
243 static void vp8_h_loop_filter_simple_c(uint8_t *dst, int stride, int flim)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
244 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
245 int i;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
246
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
247 for (i = 0; i < 16; i++)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
248 if (simple_limit(dst+i*stride, 1, flim))
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
249 filter_common(dst+i*stride, 1, 1);
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
250 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
251
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
252 static const uint8_t subpel_filters[7][6] = {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
253 { 0, 6, 123, 12, 1, 0 },
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
254 { 2, 11, 108, 36, 8, 1 },
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
255 { 0, 9, 93, 50, 6, 0 },
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
256 { 3, 16, 77, 77, 16, 3 },
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
257 { 0, 6, 50, 93, 9, 0 },
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
258 { 1, 8, 36, 108, 11, 2 },
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
259 { 0, 1, 12, 123, 6, 0 },
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
260 };
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
261
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
262 #define PUT_PIXELS(WIDTH) \
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
263 static void put_vp8_pixels ## WIDTH ##_c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int x, int y) { \
11956
496d1300204d Really fix r23782
darkshikari
parents: 11954
diff changeset
264 int i; \
496d1300204d Really fix r23782
darkshikari
parents: 11954
diff changeset
265 for (i = 0; i < h; i++, dst+= dststride, src+= srcstride) { \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
266 memcpy(dst, src, WIDTH); \
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
267 } \
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
268 }
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
269
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
270 PUT_PIXELS(16)
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
271 PUT_PIXELS(8)
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
272 PUT_PIXELS(4)
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
273
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
274 #define FILTER_6TAP(src, F, stride) \
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
275 cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + F[0]*src[x-2*stride] + \
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
276 F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + F[5]*src[x+3*stride] + 64) >> 7]
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
277
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
278 #define FILTER_4TAP(src, F, stride) \
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
279 cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + \
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
280 F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + 64) >> 7]
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
281
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
282 #define VP8_EPEL_H(SIZE, FILTER, FILTERNAME) \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
283 static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
284 { \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
285 const uint8_t *filter = subpel_filters[mx-1]; \
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
286 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
287 int x, y; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
288 \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
289 for (y = 0; y < h; y++) { \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
290 for (x = 0; x < SIZE; x++) \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
291 dst[x] = FILTER(src, filter, 1); \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
292 dst += dststride; \
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
293 src += srcstride; \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
294 } \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
295 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
296 #define VP8_EPEL_V(SIZE, FILTER, FILTERNAME) \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
297 static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
298 { \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
299 const uint8_t *filter = subpel_filters[my-1]; \
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
300 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
301 int x, y; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
302 \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
303 for (y = 0; y < h; y++) { \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
304 for (x = 0; x < SIZE; x++) \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
305 dst[x] = FILTER(src, filter, srcstride); \
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
306 dst += dststride; \
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
307 src += srcstride; \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
308 } \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
309 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
310 #define VP8_EPEL_HV(SIZE, FILTERX, FILTERY, FILTERNAME) \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
311 static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
312 { \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
313 const uint8_t *filter = subpel_filters[mx-1]; \
12007
ec7be1d7d5b4 Use crop table in C implementations of VP8 DSP functions.
darkshikari
parents: 11985
diff changeset
314 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
315 int x, y; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
316 uint8_t tmp_array[(2*SIZE+5)*SIZE]; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
317 uint8_t *tmp = tmp_array; \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
318 src -= 2*srcstride; \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
319 \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
320 for (y = 0; y < h+5; y++) { \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
321 for (x = 0; x < SIZE; x++) \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
322 tmp[x] = FILTERX(src, filter, 1); \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
323 tmp += SIZE; \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
324 src += srcstride; \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
325 } \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
326 \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
327 tmp = tmp_array + 2*SIZE; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
328 filter = subpel_filters[my-1]; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
329 \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
330 for (y = 0; y < h; y++) { \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
331 for (x = 0; x < SIZE; x++) \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
332 dst[x] = FILTERY(tmp, filter, SIZE); \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
333 dst += dststride; \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
334 tmp += SIZE; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
335 } \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
336 }
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
337
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
338 VP8_EPEL_H(16, FILTER_4TAP, h4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
339 VP8_EPEL_H(8, FILTER_4TAP, h4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
340 VP8_EPEL_H(4, FILTER_4TAP, h4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
341 VP8_EPEL_H(16, FILTER_6TAP, h6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
342 VP8_EPEL_H(8, FILTER_6TAP, h6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
343 VP8_EPEL_H(4, FILTER_6TAP, h6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
344 VP8_EPEL_V(16, FILTER_4TAP, v4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
345 VP8_EPEL_V(8, FILTER_4TAP, v4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
346 VP8_EPEL_V(4, FILTER_4TAP, v4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
347 VP8_EPEL_V(16, FILTER_6TAP, v6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
348 VP8_EPEL_V(8, FILTER_6TAP, v6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
349 VP8_EPEL_V(4, FILTER_6TAP, v6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
350 VP8_EPEL_HV(16, FILTER_4TAP, FILTER_4TAP, h4v4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
351 VP8_EPEL_HV(8, FILTER_4TAP, FILTER_4TAP, h4v4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
352 VP8_EPEL_HV(4, FILTER_4TAP, FILTER_4TAP, h4v4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
353 VP8_EPEL_HV(16, FILTER_4TAP, FILTER_6TAP, h4v6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
354 VP8_EPEL_HV(8, FILTER_4TAP, FILTER_6TAP, h4v6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
355 VP8_EPEL_HV(4, FILTER_4TAP, FILTER_6TAP, h4v6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
356 VP8_EPEL_HV(16, FILTER_6TAP, FILTER_4TAP, h6v4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
357 VP8_EPEL_HV(8, FILTER_6TAP, FILTER_4TAP, h6v4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
358 VP8_EPEL_HV(4, FILTER_6TAP, FILTER_4TAP, h6v4)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
359 VP8_EPEL_HV(16, FILTER_6TAP, FILTER_6TAP, h6v6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
360 VP8_EPEL_HV(8, FILTER_6TAP, FILTER_6TAP, h6v6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
361 VP8_EPEL_HV(4, FILTER_6TAP, FILTER_6TAP, h6v6)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
362
11974
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
363 #define VP8_BILINEAR(SIZE) \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
364 static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
365 { \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
366 int a = 8-mx, b = mx; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
367 int x, y; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
368 \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
369 for (y = 0; y < h; y++) { \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
370 for (x = 0; x < SIZE; x++) \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
371 dst[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
372 dst += stride; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
373 src += stride; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
374 } \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
375 } \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
376 static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
377 { \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
378 int c = 8-my, d = my; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
379 int x, y; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
380 \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
381 for (y = 0; y < h; y++) { \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
382 for (x = 0; x < SIZE; x++) \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
383 dst[x] = (c*src[x] + d*src[x+stride] + 4) >> 3; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
384 dst += stride; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
385 src += stride; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
386 } \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
387 } \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
388 \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
389 static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, int stride, uint8_t *src, int s2, int h, int mx, int my) \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
390 { \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
391 int a = 8-mx, b = mx; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
392 int c = 8-my, d = my; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
393 int x, y; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
394 uint8_t tmp_array[(2*SIZE+1)*SIZE]; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
395 uint8_t *tmp = tmp_array; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
396 \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
397 for (y = 0; y < h+1; y++) { \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
398 for (x = 0; x < SIZE; x++) \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
399 tmp[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
400 tmp += SIZE; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
401 src += stride; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
402 } \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
403 \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
404 tmp = tmp_array; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
405 \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
406 for (y = 0; y < h; y++) { \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
407 for (x = 0; x < SIZE; x++) \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
408 dst[x] = (c*tmp[x] + d*tmp[x+SIZE] + 4) >> 3; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
409 dst += stride; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
410 tmp += SIZE; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
411 } \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
412 }
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
413
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
414 VP8_BILINEAR(16)
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
415 VP8_BILINEAR(8)
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
416 VP8_BILINEAR(4)
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
417
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
418 #define VP8_MC_FUNC(IDX, SIZE) \
11950
56aba5a9761c Make VP8 DSP functions take two strides
darkshikari
parents: 11921
diff changeset
419 dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
420 dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
421 dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
422 dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
423 dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
424 dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
425 dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
426 dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
427 dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
428
11974
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
429 #define VP8_BILINEAR_MC_FUNC(IDX, SIZE) \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
430 dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
431 dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
432 dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
433 dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] = put_vp8_bilinear ## SIZE ## _v_c; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
434 dsp->put_vp8_bilinear_pixels_tab[IDX][1][1] = put_vp8_bilinear ## SIZE ## _hv_c; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
435 dsp->put_vp8_bilinear_pixels_tab[IDX][1][2] = put_vp8_bilinear ## SIZE ## _hv_c; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
436 dsp->put_vp8_bilinear_pixels_tab[IDX][2][0] = put_vp8_bilinear ## SIZE ## _v_c; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
437 dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
438 dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
439
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
440 av_cold void ff_vp8dsp_init(VP8DSPContext *dsp)
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
441 {
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
442 dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
443 dsp->vp8_idct_add = vp8_idct_add_c;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
444 dsp->vp8_idct_dc_add = vp8_idct_dc_add_c;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
445
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
446 dsp->vp8_v_loop_filter16 = vp8_v_loop_filter16_c;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
447 dsp->vp8_h_loop_filter16 = vp8_h_loop_filter16_c;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
448 dsp->vp8_v_loop_filter8 = vp8_v_loop_filter8_c;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
449 dsp->vp8_h_loop_filter8 = vp8_h_loop_filter8_c;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
450
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
451 dsp->vp8_v_loop_filter16_inner = vp8_v_loop_filter16_inner_c;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
452 dsp->vp8_h_loop_filter16_inner = vp8_h_loop_filter16_inner_c;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
453 dsp->vp8_v_loop_filter8_inner = vp8_v_loop_filter8_inner_c;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
454 dsp->vp8_h_loop_filter8_inner = vp8_h_loop_filter8_inner_c;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
455
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
456 dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
457 dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c;
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
458
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
459 VP8_MC_FUNC(0, 16);
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
460 VP8_MC_FUNC(1, 8);
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
461 VP8_MC_FUNC(2, 4);
11974
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
462
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
463 VP8_BILINEAR_MC_FUNC(0, 16);
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
464 VP8_BILINEAR_MC_FUNC(1, 8);
356b20a6566d VP8 bilinear filter
conrad
parents: 11956
diff changeset
465 VP8_BILINEAR_MC_FUNC(2, 4);
11975
c3afb5be0d9b First shot at VP8 optimizations:
rbultje
parents: 11974
diff changeset
466
11985
1cffcc7b1470 Fix linking if MMX is disabled.
stefano
parents: 11975
diff changeset
467 if (HAVE_MMX)
11975
c3afb5be0d9b First shot at VP8 optimizations:
rbultje
parents: 11974
diff changeset
468 ff_vp8dsp_init_x86(dsp);
12011
f96187e79438 Altivec VP8 MC functions
conrad
parents: 12008
diff changeset
469 if (HAVE_ALTIVEC)
f96187e79438 Altivec VP8 MC functions
conrad
parents: 12008
diff changeset
470 ff_vp8dsp_init_altivec(dsp);
11921
f2007d7c3f1d Native VP8 decoder.
rbultje
parents:
diff changeset
471 }