Mercurial > libavcodec.hg
annotate dsputil.c @ 12391:4be72e19ab0e libavcodec
imc: fix undefined float to int conversion
Conversion of an out of range float to int is undefined. Clipping to
the final range first avoids such problems. This fixes decoding on
MIPS, which handles these conversions differently from many other CPUs.
author | mru |
---|---|
date | Thu, 19 Aug 2010 16:51:26 +0000 |
parents | 1241c824de46 |
children | 9f06475db098 |
rev | line source |
---|---|
0 | 1 /* |
2 * DSP utils | |
8629
04423b2f6e0b
cosmetics: Remove pointless period after copyright statement non-sentences.
diego
parents:
8627
diff
changeset
|
3 * Copyright (c) 2000, 2001 Fabrice Bellard |
1739
07a484280a82
copyright year update of the files i touched and remembered, things look annoyingly unmaintained otherwise
michael
parents:
1729
diff
changeset
|
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
0 | 5 * |
5214 | 6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> |
7 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
8 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
9 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
10 * FFmpeg is free software; you can redistribute it and/or |
429 | 11 * modify it under the terms of the GNU Lesser General Public |
12 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
13 * version 2.1 of the License, or (at your option) any later version. |
0 | 14 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
15 * FFmpeg is distributed in the hope that it will be useful, |
0 | 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
18 * Lesser General Public License for more details. | |
0 | 19 * |
429 | 20 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
21 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3029
diff
changeset
|
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
0 | 23 */ |
2967 | 24 |
1106 | 25 /** |
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
11637
diff
changeset
|
26 * @file |
1106 | 27 * DSP utils |
28 */ | |
2967 | 29 |
0 | 30 #include "avcodec.h" |
31 #include "dsputil.h" | |
1092 | 32 #include "simple_idct.h" |
1557 | 33 #include "faandct.h" |
6407 | 34 #include "faanidct.h" |
8627
d6bab465b82c
moves mid_pred() into mathops.h (with arch specific code split by directory)
aurel
parents:
8596
diff
changeset
|
35 #include "mathops.h" |
10748
36611425fedb
Add required header #includes for mpegvideo.h and config.h.
diego
parents:
10644
diff
changeset
|
36 #include "mpegvideo.h" |
36611425fedb
Add required header #includes for mpegvideo.h and config.h.
diego
parents:
10644
diff
changeset
|
37 #include "config.h" |
11375
84963c795459
Move some prototypes from dsputil.c to reasonable header files
mru
parents:
11369
diff
changeset
|
38 #include "lpc.h" |
84963c795459
Move some prototypes from dsputil.c to reasonable header files
mru
parents:
11369
diff
changeset
|
39 #include "ac3dec.h" |
84963c795459
Move some prototypes from dsputil.c to reasonable header files
mru
parents:
11369
diff
changeset
|
40 #include "vorbis.h" |
84963c795459
Move some prototypes from dsputil.c to reasonable header files
mru
parents:
11369
diff
changeset
|
41 #include "png.h" |
11921 | 42 #include "vp8dsp.h" |
676 | 43 |
4176 | 44 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; |
4179 | 45 uint32_t ff_squareTbl[512] = {0, }; |
0 | 46 |
6387 | 47 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size |
48 #define pb_7f (~0UL/255 * 0x7f) | |
49 #define pb_80 (~0UL/255 * 0x80) | |
6385 | 50 |
1064 | 51 const uint8_t ff_zigzag_direct[64] = { |
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
52 0, 1, 8, 16, 9, 2, 3, 10, |
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
53 17, 24, 32, 25, 18, 11, 4, 5, |
34 | 54 12, 19, 26, 33, 40, 48, 41, 34, |
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
55 27, 20, 13, 6, 7, 14, 21, 28, |
34 | 56 35, 42, 49, 56, 57, 50, 43, 36, |
57 29, 22, 15, 23, 30, 37, 44, 51, | |
58 58, 59, 52, 45, 38, 31, 39, 46, | |
59 53, 60, 61, 54, 47, 55, 62, 63 | |
60 }; | |
61 | |
1567 | 62 /* Specific zigzag scan for 248 idct. NOTE that unlike the |
63 specification, we interleave the fields */ | |
64 const uint8_t ff_zigzag248_direct[64] = { | |
65 0, 8, 1, 9, 16, 24, 2, 10, | |
66 17, 25, 32, 40, 48, 56, 33, 41, | |
67 18, 26, 3, 11, 4, 12, 19, 27, | |
68 34, 42, 49, 57, 50, 58, 35, 43, | |
69 20, 28, 5, 13, 6, 14, 21, 29, | |
70 36, 44, 51, 59, 52, 60, 37, 45, | |
71 22, 30, 7, 15, 23, 31, 38, 46, | |
72 53, 61, 54, 62, 39, 47, 55, 63, | |
73 }; | |
74 | |
220 | 75 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ |
11369 | 76 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64]; |
220 | 77 |
1064 | 78 const uint8_t ff_alternate_horizontal_scan[64] = { |
2967 | 79 0, 1, 2, 3, 8, 9, 16, 17, |
34 | 80 10, 11, 4, 5, 6, 7, 15, 14, |
2967 | 81 13, 12, 19, 18, 24, 25, 32, 33, |
34 | 82 26, 27, 20, 21, 22, 23, 28, 29, |
2967 | 83 30, 31, 34, 35, 40, 41, 48, 49, |
34 | 84 42, 43, 36, 37, 38, 39, 44, 45, |
2967 | 85 46, 47, 50, 51, 56, 57, 58, 59, |
34 | 86 52, 53, 54, 55, 60, 61, 62, 63, |
87 }; | |
88 | |
1064 | 89 const uint8_t ff_alternate_vertical_scan[64] = { |
2967 | 90 0, 8, 16, 24, 1, 9, 2, 10, |
34 | 91 17, 25, 32, 40, 48, 56, 57, 49, |
2967 | 92 41, 33, 26, 18, 3, 11, 4, 12, |
34 | 93 19, 27, 34, 42, 50, 58, 35, 43, |
2967 | 94 51, 59, 20, 28, 5, 13, 6, 14, |
34 | 95 21, 29, 36, 44, 52, 60, 37, 45, |
2967 | 96 53, 61, 22, 30, 7, 15, 23, 31, |
34 | 97 38, 46, 54, 62, 39, 47, 55, 63, |
98 }; | |
99 | |
1092 | 100 /* Input permutation for the simple_idct_mmx */ |
101 static const uint8_t simple_mmx_permutation[64]={ | |
2979 | 102 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, |
103 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |
104 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |
105 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |
106 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |
107 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |
108 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |
109 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |
1092 | 110 }; |
111 | |
6600
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
112 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7}; |
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
113 |
6438 | 114 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){ |
115 int i; | |
116 int end; | |
117 | |
118 st->scantable= src_scantable; | |
119 | |
120 for(i=0; i<64; i++){ | |
121 int j; | |
122 j = src_scantable[i]; | |
123 st->permutated[i] = permutation[j]; | |
8590 | 124 #if ARCH_PPC |
6438 | 125 st->inverse[j] = i; |
126 #endif | |
127 } | |
128 | |
129 end=-1; | |
130 for(i=0; i<64; i++){ | |
131 int j; | |
132 j = st->permutated[i]; | |
133 if(j>end) end=j; | |
134 st->raster_end[i]= end; | |
135 } | |
136 } | |
137 | |
1064 | 138 static int pix_sum_c(uint8_t * pix, int line_size) |
612 | 139 { |
140 int s, i, j; | |
141 | |
142 s = 0; | |
143 for (i = 0; i < 16; i++) { | |
2979 | 144 for (j = 0; j < 16; j += 8) { |
145 s += pix[0]; | |
146 s += pix[1]; | |
147 s += pix[2]; | |
148 s += pix[3]; | |
149 s += pix[4]; | |
150 s += pix[5]; | |
151 s += pix[6]; | |
152 s += pix[7]; | |
153 pix += 8; | |
154 } | |
155 pix += line_size - 16; | |
612 | 156 } |
157 return s; | |
158 } | |
159 | |
1064 | 160 static int pix_norm1_c(uint8_t * pix, int line_size) |
612 | 161 { |
162 int s, i, j; | |
4179 | 163 uint32_t *sq = ff_squareTbl + 256; |
612 | 164 |
165 s = 0; | |
166 for (i = 0; i < 16; i++) { | |
2979 | 167 for (j = 0; j < 16; j += 8) { |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
168 #if 0 |
2979 | 169 s += sq[pix[0]]; |
170 s += sq[pix[1]]; | |
171 s += sq[pix[2]]; | |
172 s += sq[pix[3]]; | |
173 s += sq[pix[4]]; | |
174 s += sq[pix[5]]; | |
175 s += sq[pix[6]]; | |
176 s += sq[pix[7]]; | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
177 #else |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
178 #if LONG_MAX > 2147483647 |
2979 | 179 register uint64_t x=*(uint64_t*)pix; |
180 s += sq[x&0xff]; | |
181 s += sq[(x>>8)&0xff]; | |
182 s += sq[(x>>16)&0xff]; | |
183 s += sq[(x>>24)&0xff]; | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
184 s += sq[(x>>32)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
185 s += sq[(x>>40)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
186 s += sq[(x>>48)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
187 s += sq[(x>>56)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
188 #else |
2979 | 189 register uint32_t x=*(uint32_t*)pix; |
190 s += sq[x&0xff]; | |
191 s += sq[(x>>8)&0xff]; | |
192 s += sq[(x>>16)&0xff]; | |
193 s += sq[(x>>24)&0xff]; | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
194 x=*(uint32_t*)(pix+4); |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
195 s += sq[x&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
196 s += sq[(x>>8)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
197 s += sq[(x>>16)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
198 s += sq[(x>>24)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
199 #endif |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
200 #endif |
2979 | 201 pix += 8; |
202 } | |
203 pix += line_size - 16; | |
612 | 204 } |
205 return s; | |
206 } | |
207 | |
6241 | 208 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){ |
1273 | 209 int i; |
2967 | 210 |
1273 | 211 for(i=0; i+8<=w; i+=8){ |
12129 | 212 dst[i+0]= av_bswap32(src[i+0]); |
213 dst[i+1]= av_bswap32(src[i+1]); | |
214 dst[i+2]= av_bswap32(src[i+2]); | |
215 dst[i+3]= av_bswap32(src[i+3]); | |
216 dst[i+4]= av_bswap32(src[i+4]); | |
217 dst[i+5]= av_bswap32(src[i+5]); | |
218 dst[i+6]= av_bswap32(src[i+6]); | |
219 dst[i+7]= av_bswap32(src[i+7]); | |
1273 | 220 } |
221 for(;i<w; i++){ | |
12129 | 222 dst[i+0]= av_bswap32(src[i+0]); |
1273 | 223 } |
224 } | |
612 | 225 |
2184 | 226 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) |
227 { | |
228 int s, i; | |
4179 | 229 uint32_t *sq = ff_squareTbl + 256; |
2184 | 230 |
231 s = 0; | |
232 for (i = 0; i < h; i++) { | |
233 s += sq[pix1[0] - pix2[0]]; | |
234 s += sq[pix1[1] - pix2[1]]; | |
235 s += sq[pix1[2] - pix2[2]]; | |
236 s += sq[pix1[3] - pix2[3]]; | |
237 pix1 += line_size; | |
238 pix2 += line_size; | |
239 } | |
240 return s; | |
241 } | |
242 | |
1708 | 243 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) |
936 | 244 { |
245 int s, i; | |
4179 | 246 uint32_t *sq = ff_squareTbl + 256; |
936 | 247 |
248 s = 0; | |
1708 | 249 for (i = 0; i < h; i++) { |
936 | 250 s += sq[pix1[0] - pix2[0]]; |
251 s += sq[pix1[1] - pix2[1]]; | |
252 s += sq[pix1[2] - pix2[2]]; | |
253 s += sq[pix1[3] - pix2[3]]; | |
254 s += sq[pix1[4] - pix2[4]]; | |
255 s += sq[pix1[5] - pix2[5]]; | |
256 s += sq[pix1[6] - pix2[6]]; | |
257 s += sq[pix1[7] - pix2[7]]; | |
258 pix1 += line_size; | |
259 pix2 += line_size; | |
260 } | |
261 return s; | |
262 } | |
263 | |
1708 | 264 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
884 | 265 { |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
266 int s, i; |
4179 | 267 uint32_t *sq = ff_squareTbl + 256; |
884 | 268 |
269 s = 0; | |
1708 | 270 for (i = 0; i < h; i++) { |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
271 s += sq[pix1[ 0] - pix2[ 0]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
272 s += sq[pix1[ 1] - pix2[ 1]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
273 s += sq[pix1[ 2] - pix2[ 2]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
274 s += sq[pix1[ 3] - pix2[ 3]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
275 s += sq[pix1[ 4] - pix2[ 4]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
276 s += sq[pix1[ 5] - pix2[ 5]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
277 s += sq[pix1[ 6] - pix2[ 6]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
278 s += sq[pix1[ 7] - pix2[ 7]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
279 s += sq[pix1[ 8] - pix2[ 8]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
280 s += sq[pix1[ 9] - pix2[ 9]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
281 s += sq[pix1[10] - pix2[10]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
282 s += sq[pix1[11] - pix2[11]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
283 s += sq[pix1[12] - pix2[12]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
284 s += sq[pix1[13] - pix2[13]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
285 s += sq[pix1[14] - pix2[14]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
286 s += sq[pix1[15] - pix2[15]]; |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
287 |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
288 pix1 += line_size; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
289 pix2 += line_size; |
884 | 290 } |
291 return s; | |
292 } | |
293 | |
6437 | 294 /* draw the edges of width 'w' of an image of size width, height */ |
295 //FIXME check that this is ok for mpeg4 interlaced | |
296 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w) | |
297 { | |
298 uint8_t *ptr, *last_line; | |
299 int i; | |
300 | |
301 last_line = buf + (height - 1) * wrap; | |
302 for(i=0;i<w;i++) { | |
303 /* top and bottom */ | |
304 memcpy(buf - (i + 1) * wrap, buf, width); | |
305 memcpy(last_line + (i + 1) * wrap, last_line, width); | |
306 } | |
307 /* left and right */ | |
308 ptr = buf; | |
309 for(i=0;i<height;i++) { | |
310 memset(ptr - w, ptr[0], w); | |
311 memset(ptr + width, ptr[width-1], w); | |
312 ptr += wrap; | |
313 } | |
314 /* corners */ | |
315 for(i=0;i<w;i++) { | |
316 memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */ | |
317 memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */ | |
318 memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */ | |
319 memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */ | |
320 } | |
321 } | |
322 | |
6445 | 323 /** |
12024 | 324 * Copy a rectangular area of samples to a temporary buffer and replicate the border samples. |
6445 | 325 * @param buf destination buffer |
326 * @param src source buffer | |
327 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers | |
328 * @param block_w width of block | |
329 * @param block_h height of block | |
330 * @param src_x x coordinate of the top left sample of the block in the source buffer | |
331 * @param src_y y coordinate of the top left sample of the block in the source buffer | |
332 * @param w width of the source buffer | |
333 * @param h height of the source buffer | |
334 */ | |
11784 | 335 void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h, |
6445 | 336 int src_x, int src_y, int w, int h){ |
337 int x, y; | |
338 int start_y, start_x, end_y, end_x; | |
339 | |
340 if(src_y>= h){ | |
341 src+= (h-1-src_y)*linesize; | |
342 src_y=h-1; | |
343 }else if(src_y<=-block_h){ | |
344 src+= (1-block_h-src_y)*linesize; | |
345 src_y=1-block_h; | |
346 } | |
347 if(src_x>= w){ | |
348 src+= (w-1-src_x); | |
349 src_x=w-1; | |
350 }else if(src_x<=-block_w){ | |
351 src+= (1-block_w-src_x); | |
352 src_x=1-block_w; | |
353 } | |
354 | |
355 start_y= FFMAX(0, -src_y); | |
356 start_x= FFMAX(0, -src_x); | |
357 end_y= FFMIN(block_h, h-src_y); | |
358 end_x= FFMIN(block_w, w-src_x); | |
359 | |
360 // copy existing part | |
361 for(y=start_y; y<end_y; y++){ | |
362 for(x=start_x; x<end_x; x++){ | |
363 buf[x + y*linesize]= src[x + y*linesize]; | |
364 } | |
365 } | |
366 | |
367 //top | |
368 for(y=0; y<start_y; y++){ | |
369 for(x=start_x; x<end_x; x++){ | |
370 buf[x + y*linesize]= buf[x + start_y*linesize]; | |
371 } | |
372 } | |
373 | |
374 //bottom | |
375 for(y=end_y; y<block_h; y++){ | |
376 for(x=start_x; x<end_x; x++){ | |
377 buf[x + y*linesize]= buf[x + (end_y-1)*linesize]; | |
378 } | |
379 } | |
380 | |
381 for(y=0; y<block_h; y++){ | |
382 //left | |
383 for(x=0; x<start_x; x++){ | |
384 buf[x + y*linesize]= buf[start_x + y*linesize]; | |
385 } | |
386 | |
387 //right | |
388 for(x=end_x; x<block_w; x++){ | |
389 buf[x + y*linesize]= buf[end_x - 1 + y*linesize]; | |
390 } | |
391 } | |
392 } | |
393 | |
1064 | 394 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) |
0 | 395 { |
396 int i; | |
397 | |
398 /* read the pixels */ | |
399 for(i=0;i<8;i++) { | |
516 | 400 block[0] = pixels[0]; |
401 block[1] = pixels[1]; | |
402 block[2] = pixels[2]; | |
403 block[3] = pixels[3]; | |
404 block[4] = pixels[4]; | |
405 block[5] = pixels[5]; | |
406 block[6] = pixels[6]; | |
407 block[7] = pixels[7]; | |
408 pixels += line_size; | |
409 block += 8; | |
0 | 410 } |
411 } | |
412 | |
1064 | 413 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, |
2979 | 414 const uint8_t *s2, int stride){ |
324 | 415 int i; |
416 | |
417 /* read the pixels */ | |
418 for(i=0;i<8;i++) { | |
516 | 419 block[0] = s1[0] - s2[0]; |
420 block[1] = s1[1] - s2[1]; | |
421 block[2] = s1[2] - s2[2]; | |
422 block[3] = s1[3] - s2[3]; | |
423 block[4] = s1[4] - s2[4]; | |
424 block[5] = s1[5] - s2[5]; | |
425 block[6] = s1[6] - s2[6]; | |
426 block[7] = s1[7] - s2[7]; | |
324 | 427 s1 += stride; |
428 s2 += stride; | |
516 | 429 block += 8; |
324 | 430 } |
431 } | |
432 | |
433 | |
1064 | 434 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
2979 | 435 int line_size) |
0 | 436 { |
437 int i; | |
4176 | 438 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 439 |
0 | 440 /* read the pixels */ |
441 for(i=0;i<8;i++) { | |
516 | 442 pixels[0] = cm[block[0]]; |
443 pixels[1] = cm[block[1]]; | |
444 pixels[2] = cm[block[2]]; | |
445 pixels[3] = cm[block[3]]; | |
446 pixels[4] = cm[block[4]]; | |
447 pixels[5] = cm[block[5]]; | |
448 pixels[6] = cm[block[6]]; | |
449 pixels[7] = cm[block[7]]; | |
450 | |
451 pixels += line_size; | |
452 block += 8; | |
0 | 453 } |
454 } | |
455 | |
2256 | 456 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, |
2979 | 457 int line_size) |
2256 | 458 { |
459 int i; | |
4176 | 460 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 461 |
2256 | 462 /* read the pixels */ |
463 for(i=0;i<4;i++) { | |
464 pixels[0] = cm[block[0]]; | |
465 pixels[1] = cm[block[1]]; | |
466 pixels[2] = cm[block[2]]; | |
467 pixels[3] = cm[block[3]]; | |
468 | |
469 pixels += line_size; | |
470 block += 8; | |
471 } | |
472 } | |
473 | |
2257 | 474 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, |
2979 | 475 int line_size) |
2257 | 476 { |
477 int i; | |
4176 | 478 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 479 |
2257 | 480 /* read the pixels */ |
481 for(i=0;i<2;i++) { | |
482 pixels[0] = cm[block[0]]; | |
483 pixels[1] = cm[block[1]]; | |
484 | |
485 pixels += line_size; | |
486 block += 8; | |
487 } | |
488 } | |
489 | |
2967 | 490 static void put_signed_pixels_clamped_c(const DCTELEM *block, |
1984
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
491 uint8_t *restrict pixels, |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
492 int line_size) |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
493 { |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
494 int i, j; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
495 |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
496 for (i = 0; i < 8; i++) { |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
497 for (j = 0; j < 8; j++) { |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
498 if (*block < -128) |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
499 *pixels = 0; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
500 else if (*block > 127) |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
501 *pixels = 255; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
502 else |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
503 *pixels = (uint8_t)(*block + 128); |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
504 block++; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
505 pixels++; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
506 } |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
507 pixels += (line_size - 8); |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
508 } |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
509 } |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
510 |
11231 | 511 static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
512 int line_size) | |
513 { | |
514 int i; | |
515 | |
516 /* read the pixels */ | |
517 for(i=0;i<8;i++) { | |
518 pixels[0] = block[0]; | |
519 pixels[1] = block[1]; | |
520 pixels[2] = block[2]; | |
521 pixels[3] = block[3]; | |
522 pixels[4] = block[4]; | |
523 pixels[5] = block[5]; | |
524 pixels[6] = block[6]; | |
525 pixels[7] = block[7]; | |
526 | |
527 pixels += line_size; | |
528 block += 8; | |
529 } | |
530 } | |
531 | |
1064 | 532 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
516 | 533 int line_size) |
0 | 534 { |
535 int i; | |
4176 | 536 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 537 |
0 | 538 /* read the pixels */ |
539 for(i=0;i<8;i++) { | |
516 | 540 pixels[0] = cm[pixels[0] + block[0]]; |
541 pixels[1] = cm[pixels[1] + block[1]]; | |
542 pixels[2] = cm[pixels[2] + block[2]]; | |
543 pixels[3] = cm[pixels[3] + block[3]]; | |
544 pixels[4] = cm[pixels[4] + block[4]]; | |
545 pixels[5] = cm[pixels[5] + block[5]]; | |
546 pixels[6] = cm[pixels[6] + block[6]]; | |
547 pixels[7] = cm[pixels[7] + block[7]]; | |
548 pixels += line_size; | |
549 block += 8; | |
0 | 550 } |
551 } | |
2256 | 552 |
553 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, | |
554 int line_size) | |
555 { | |
556 int i; | |
4176 | 557 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 558 |
2256 | 559 /* read the pixels */ |
560 for(i=0;i<4;i++) { | |
561 pixels[0] = cm[pixels[0] + block[0]]; | |
562 pixels[1] = cm[pixels[1] + block[1]]; | |
563 pixels[2] = cm[pixels[2] + block[2]]; | |
564 pixels[3] = cm[pixels[3] + block[3]]; | |
565 pixels += line_size; | |
566 block += 8; | |
567 } | |
568 } | |
2257 | 569 |
570 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, | |
571 int line_size) | |
572 { | |
573 int i; | |
4176 | 574 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 575 |
2257 | 576 /* read the pixels */ |
577 for(i=0;i<2;i++) { | |
578 pixels[0] = cm[pixels[0] + block[0]]; | |
579 pixels[1] = cm[pixels[1] + block[1]]; | |
580 pixels += line_size; | |
581 block += 8; | |
582 } | |
583 } | |
2763 | 584 |
585 static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) | |
586 { | |
587 int i; | |
588 for(i=0;i<8;i++) { | |
589 pixels[0] += block[0]; | |
590 pixels[1] += block[1]; | |
591 pixels[2] += block[2]; | |
592 pixels[3] += block[3]; | |
593 pixels[4] += block[4]; | |
594 pixels[5] += block[5]; | |
595 pixels[6] += block[6]; | |
596 pixels[7] += block[7]; | |
597 pixels += line_size; | |
598 block += 8; | |
599 } | |
600 } | |
601 | |
602 static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) | |
603 { | |
604 int i; | |
605 for(i=0;i<4;i++) { | |
606 pixels[0] += block[0]; | |
607 pixels[1] += block[1]; | |
608 pixels[2] += block[2]; | |
609 pixels[3] += block[3]; | |
610 pixels += line_size; | |
611 block += 4; | |
612 } | |
613 } | |
614 | |
4988
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
615 static int sum_abs_dctelem_c(DCTELEM *block) |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
616 { |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
617 int sum=0, i; |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
618 for(i=0; i<64; i++) |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
619 sum+= FFABS(block[i]); |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
620 return sum; |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
621 } |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
622 |
11231 | 623 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h) |
624 { | |
625 int i; | |
626 | |
627 for (i = 0; i < h; i++) { | |
628 memset(block, value, 16); | |
629 block += line_size; | |
630 } | |
631 } | |
632 | |
633 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h) | |
634 { | |
635 int i; | |
636 | |
637 for (i = 0; i < h; i++) { | |
638 memset(block, value, 8); | |
639 block += line_size; | |
640 } | |
641 } | |
642 | |
643 static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize) | |
644 { | |
645 int i, j; | |
11459 | 646 uint16_t *dst1 = (uint16_t *) dst; |
647 uint16_t *dst2 = (uint16_t *)(dst + linesize); | |
11231 | 648 |
649 for (j = 0; j < 8; j++) { | |
650 for (i = 0; i < 8; i++) { | |
651 dst1[i] = dst2[i] = src[i] * 0x0101; | |
652 } | |
653 src += 8; | |
654 dst1 += linesize; | |
655 dst2 += linesize; | |
656 } | |
657 } | |
658 | |
385 | 659 #if 0 |
660 | |
661 #define PIXOP2(OPNAME, OP) \ | |
651 | 662 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 663 {\ |
664 int i;\ | |
665 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
666 OP(*((uint64_t*)block), AV_RN64(pixels));\ |
385 | 667 pixels+=line_size;\ |
668 block +=line_size;\ | |
669 }\ | |
670 }\ | |
671 \ | |
859 | 672 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 673 {\ |
674 int i;\ | |
675 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
676 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
677 const uint64_t b= AV_RN64(pixels+1);\ |
385 | 678 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
679 pixels+=line_size;\ | |
680 block +=line_size;\ | |
681 }\ | |
682 }\ | |
683 \ | |
859 | 684 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 685 {\ |
686 int i;\ | |
687 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
688 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
689 const uint64_t b= AV_RN64(pixels+1);\ |
385 | 690 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
691 pixels+=line_size;\ | |
692 block +=line_size;\ | |
693 }\ | |
694 }\ | |
695 \ | |
859 | 696 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 697 {\ |
698 int i;\ | |
699 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
700 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
701 const uint64_t b= AV_RN64(pixels+line_size);\ |
385 | 702 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
703 pixels+=line_size;\ | |
704 block +=line_size;\ | |
705 }\ | |
706 }\ | |
707 \ | |
859 | 708 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 709 {\ |
710 int i;\ | |
711 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
712 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
713 const uint64_t b= AV_RN64(pixels+line_size);\ |
385 | 714 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
715 pixels+=line_size;\ | |
716 block +=line_size;\ | |
717 }\ | |
718 }\ | |
719 \ | |
859 | 720 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 721 {\ |
722 int i;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
723 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
724 const uint64_t b= AV_RN64(pixels+1);\ |
385 | 725 uint64_t l0= (a&0x0303030303030303ULL)\ |
726 + (b&0x0303030303030303ULL)\ | |
727 + 0x0202020202020202ULL;\ | |
728 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
729 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
730 uint64_t l1,h1;\ | |
731 \ | |
732 pixels+=line_size;\ | |
733 for(i=0; i<h; i+=2){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
734 uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
735 uint64_t b= AV_RN64(pixels+1);\ |
385 | 736 l1= (a&0x0303030303030303ULL)\ |
737 + (b&0x0303030303030303ULL);\ | |
738 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
739 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
740 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
741 pixels+=line_size;\ | |
742 block +=line_size;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
743 a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
744 b= AV_RN64(pixels+1);\ |
385 | 745 l0= (a&0x0303030303030303ULL)\ |
746 + (b&0x0303030303030303ULL)\ | |
747 + 0x0202020202020202ULL;\ | |
748 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
749 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
750 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
751 pixels+=line_size;\ | |
752 block +=line_size;\ | |
753 }\ | |
754 }\ | |
755 \ | |
859 | 756 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 757 {\ |
758 int i;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
759 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
760 const uint64_t b= AV_RN64(pixels+1);\ |
385 | 761 uint64_t l0= (a&0x0303030303030303ULL)\ |
762 + (b&0x0303030303030303ULL)\ | |
763 + 0x0101010101010101ULL;\ | |
764 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
765 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
766 uint64_t l1,h1;\ | |
767 \ | |
768 pixels+=line_size;\ | |
769 for(i=0; i<h; i+=2){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
770 uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
771 uint64_t b= AV_RN64(pixels+1);\ |
385 | 772 l1= (a&0x0303030303030303ULL)\ |
773 + (b&0x0303030303030303ULL);\ | |
774 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
775 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
776 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
777 pixels+=line_size;\ | |
778 block +=line_size;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
779 a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
780 b= AV_RN64(pixels+1);\ |
385 | 781 l0= (a&0x0303030303030303ULL)\ |
782 + (b&0x0303030303030303ULL)\ | |
783 + 0x0101010101010101ULL;\ | |
784 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
785 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
786 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
787 pixels+=line_size;\ | |
788 block +=line_size;\ | |
789 }\ | |
790 }\ | |
791 \ | |
859 | 792 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\ |
793 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\ | |
794 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\ | |
795 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\ | |
796 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\ | |
797 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\ | |
798 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8) | |
385 | 799 |
800 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) | |
801 #else // 64 bit variant | |
802 | |
803 #define PIXOP2(OPNAME, OP) \ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
804 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
805 int i;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
806 for(i=0; i<h; i++){\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
807 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
808 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
809 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
810 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
811 }\ |
1168 | 812 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
813 int i;\ | |
814 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
815 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ |
1168 | 816 pixels+=line_size;\ |
817 block +=line_size;\ | |
818 }\ | |
819 }\ | |
859 | 820 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
385 | 821 int i;\ |
822 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
823 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
824 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\ |
385 | 825 pixels+=line_size;\ |
826 block +=line_size;\ | |
827 }\ | |
828 }\ | |
859 | 829 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
830 OPNAME ## _pixels8_c(block, pixels, line_size, h);\ | |
651 | 831 }\ |
385 | 832 \ |
651 | 833 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
834 int src_stride1, int src_stride2, int h){\ | |
385 | 835 int i;\ |
836 for(i=0; i<h; i++){\ | |
651 | 837 uint32_t a,b;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
838 a= AV_RN32(&src1[i*src_stride1 ]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
839 b= AV_RN32(&src2[i*src_stride2 ]);\ |
1264 | 840 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
841 a= AV_RN32(&src1[i*src_stride1+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
842 b= AV_RN32(&src2[i*src_stride2+4]);\ |
1264 | 843 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ |
385 | 844 }\ |
845 }\ | |
846 \ | |
651 | 847 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
848 int src_stride1, int src_stride2, int h){\ | |
385 | 849 int i;\ |
850 for(i=0; i<h; i++){\ | |
651 | 851 uint32_t a,b;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
852 a= AV_RN32(&src1[i*src_stride1 ]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
853 b= AV_RN32(&src2[i*src_stride2 ]);\ |
1264 | 854 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
855 a= AV_RN32(&src1[i*src_stride1+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
856 b= AV_RN32(&src2[i*src_stride2+4]);\ |
1264 | 857 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ |
385 | 858 }\ |
859 }\ | |
860 \ | |
1168 | 861 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
862 int src_stride1, int src_stride2, int h){\ | |
863 int i;\ | |
864 for(i=0; i<h; i++){\ | |
865 uint32_t a,b;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
866 a= AV_RN32(&src1[i*src_stride1 ]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
867 b= AV_RN32(&src2[i*src_stride2 ]);\ |
1264 | 868 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
1168 | 869 }\ |
870 }\ | |
871 \ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
872 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
873 int src_stride1, int src_stride2, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
874 int i;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
875 for(i=0; i<h; i++){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
876 uint32_t a,b;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
877 a= AV_RN16(&src1[i*src_stride1 ]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
878 b= AV_RN16(&src2[i*src_stride2 ]);\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
879 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
880 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
881 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
882 \ |
651 | 883 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
884 int src_stride1, int src_stride2, int h){\ | |
885 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | |
886 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | |
887 }\ | |
888 \ | |
889 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | |
890 int src_stride1, int src_stride2, int h){\ | |
891 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | |
892 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | |
893 }\ | |
894 \ | |
859 | 895 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 896 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
897 }\ | |
898 \ | |
859 | 899 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 900 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
901 }\ | |
902 \ | |
859 | 903 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 904 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
905 }\ | |
906 \ | |
859 | 907 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 908 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
385 | 909 }\ |
910 \ | |
11783 | 911 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ |
651 | 912 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
913 int i;\ | |
914 for(i=0; i<h; i++){\ | |
915 uint32_t a, b, c, d, l0, l1, h0, h1;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
916 a= AV_RN32(&src1[i*src_stride1]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
917 b= AV_RN32(&src2[i*src_stride2]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
918 c= AV_RN32(&src3[i*src_stride3]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
919 d= AV_RN32(&src4[i*src_stride4]);\ |
651 | 920 l0= (a&0x03030303UL)\ |
921 + (b&0x03030303UL)\ | |
922 + 0x02020202UL;\ | |
923 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
924 + ((b&0xFCFCFCFCUL)>>2);\ | |
925 l1= (c&0x03030303UL)\ | |
926 + (d&0x03030303UL);\ | |
927 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
928 + ((d&0xFCFCFCFCUL)>>2);\ | |
929 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
930 a= AV_RN32(&src1[i*src_stride1+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
931 b= AV_RN32(&src2[i*src_stride2+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
932 c= AV_RN32(&src3[i*src_stride3+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
933 d= AV_RN32(&src4[i*src_stride4+4]);\ |
651 | 934 l0= (a&0x03030303UL)\ |
935 + (b&0x03030303UL)\ | |
936 + 0x02020202UL;\ | |
937 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
938 + ((b&0xFCFCFCFCUL)>>2);\ | |
939 l1= (c&0x03030303UL)\ | |
940 + (d&0x03030303UL);\ | |
941 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
942 + ((d&0xFCFCFCFCUL)>>2);\ | |
943 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
944 }\ | |
945 }\ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
946 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
947 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
948 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
949 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
950 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
951 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
952 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
953 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
954 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
955 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
956 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
957 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
958 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
959 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
960 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
961 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
962 \ |
11783 | 963 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ |
651 | 964 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
385 | 965 int i;\ |
966 for(i=0; i<h; i++){\ | |
651 | 967 uint32_t a, b, c, d, l0, l1, h0, h1;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
968 a= AV_RN32(&src1[i*src_stride1]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
969 b= AV_RN32(&src2[i*src_stride2]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
970 c= AV_RN32(&src3[i*src_stride3]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
971 d= AV_RN32(&src4[i*src_stride4]);\ |
651 | 972 l0= (a&0x03030303UL)\ |
973 + (b&0x03030303UL)\ | |
974 + 0x01010101UL;\ | |
975 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
976 + ((b&0xFCFCFCFCUL)>>2);\ | |
977 l1= (c&0x03030303UL)\ | |
978 + (d&0x03030303UL);\ | |
979 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
980 + ((d&0xFCFCFCFCUL)>>2);\ | |
981 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
982 a= AV_RN32(&src1[i*src_stride1+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
983 b= AV_RN32(&src2[i*src_stride2+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
984 c= AV_RN32(&src3[i*src_stride3+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
985 d= AV_RN32(&src4[i*src_stride4+4]);\ |
651 | 986 l0= (a&0x03030303UL)\ |
987 + (b&0x03030303UL)\ | |
988 + 0x01010101UL;\ | |
989 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
990 + ((b&0xFCFCFCFCUL)>>2);\ | |
991 l1= (c&0x03030303UL)\ | |
992 + (d&0x03030303UL);\ | |
993 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
994 + ((d&0xFCFCFCFCUL)>>2);\ | |
995 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
385 | 996 }\ |
997 }\ | |
11783 | 998 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ |
651 | 999 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
1000 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
1001 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
1002 }\ | |
11783 | 1003 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ |
651 | 1004 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
1005 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
1006 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
1007 }\ | |
385 | 1008 \ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1009 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1010 {\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1011 int i, a0, b0, a1, b1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1012 a0= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1013 b0= pixels[1] + 2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1014 a0 += b0;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1015 b0 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1016 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1017 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1018 for(i=0; i<h; i+=2){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1019 a1= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1020 b1= pixels[1];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1021 a1 += b1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1022 b1 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1023 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1024 block[0]= (a1+a0)>>2; /* FIXME non put */\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1025 block[1]= (b1+b0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1026 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1027 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1028 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1029 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1030 a0= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1031 b0= pixels[1] + 2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1032 a0 += b0;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1033 b0 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1034 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1035 block[0]= (a1+a0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1036 block[1]= (b1+b0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1037 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1038 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1039 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1040 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1041 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1042 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1043 {\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1044 int i;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1045 const uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1046 const uint32_t b= AV_RN32(pixels+1);\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1047 uint32_t l0= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1048 + (b&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1049 + 0x02020202UL;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1050 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1051 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1052 uint32_t l1,h1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1053 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1054 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1055 for(i=0; i<h; i+=2){\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1056 uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1057 uint32_t b= AV_RN32(pixels+1);\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1058 l1= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1059 + (b&0x03030303UL);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1060 h1= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1061 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1062 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1063 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1064 block +=line_size;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1065 a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1066 b= AV_RN32(pixels+1);\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1067 l0= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1068 + (b&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1069 + 0x02020202UL;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1070 h0= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1071 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1072 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1073 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1074 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1075 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1076 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1077 \ |
859 | 1078 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 1079 {\ |
1080 int j;\ | |
1081 for(j=0; j<2; j++){\ | |
1082 int i;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1083 const uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1084 const uint32_t b= AV_RN32(pixels+1);\ |
385 | 1085 uint32_t l0= (a&0x03030303UL)\ |
1086 + (b&0x03030303UL)\ | |
1087 + 0x02020202UL;\ | |
1088 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1089 + ((b&0xFCFCFCFCUL)>>2);\ | |
1090 uint32_t l1,h1;\ | |
1091 \ | |
1092 pixels+=line_size;\ | |
1093 for(i=0; i<h; i+=2){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1094 uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1095 uint32_t b= AV_RN32(pixels+1);\ |
385 | 1096 l1= (a&0x03030303UL)\ |
1097 + (b&0x03030303UL);\ | |
1098 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
1099 + ((b&0xFCFCFCFCUL)>>2);\ | |
1100 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1101 pixels+=line_size;\ | |
1102 block +=line_size;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1103 a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1104 b= AV_RN32(pixels+1);\ |
385 | 1105 l0= (a&0x03030303UL)\ |
1106 + (b&0x03030303UL)\ | |
1107 + 0x02020202UL;\ | |
1108 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1109 + ((b&0xFCFCFCFCUL)>>2);\ | |
1110 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1111 pixels+=line_size;\ | |
1112 block +=line_size;\ | |
1113 }\ | |
1114 pixels+=4-line_size*(h+1);\ | |
1115 block +=4-line_size*h;\ | |
1116 }\ | |
1117 }\ | |
1118 \ | |
859 | 1119 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 1120 {\ |
1121 int j;\ | |
1122 for(j=0; j<2; j++){\ | |
1123 int i;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1124 const uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1125 const uint32_t b= AV_RN32(pixels+1);\ |
385 | 1126 uint32_t l0= (a&0x03030303UL)\ |
1127 + (b&0x03030303UL)\ | |
1128 + 0x01010101UL;\ | |
1129 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1130 + ((b&0xFCFCFCFCUL)>>2);\ | |
1131 uint32_t l1,h1;\ | |
1132 \ | |
1133 pixels+=line_size;\ | |
1134 for(i=0; i<h; i+=2){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1135 uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1136 uint32_t b= AV_RN32(pixels+1);\ |
385 | 1137 l1= (a&0x03030303UL)\ |
1138 + (b&0x03030303UL);\ | |
1139 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
1140 + ((b&0xFCFCFCFCUL)>>2);\ | |
1141 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1142 pixels+=line_size;\ | |
1143 block +=line_size;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1144 a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1145 b= AV_RN32(pixels+1);\ |
385 | 1146 l0= (a&0x03030303UL)\ |
1147 + (b&0x03030303UL)\ | |
1148 + 0x01010101UL;\ | |
1149 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1150 + ((b&0xFCFCFCFCUL)>>2);\ | |
1151 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1152 pixels+=line_size;\ | |
1153 block +=line_size;\ | |
1154 }\ | |
1155 pixels+=4-line_size*(h+1);\ | |
1156 block +=4-line_size*h;\ | |
1157 }\ | |
1158 }\ | |
1159 \ | |
859 | 1160 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\ |
1161 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\ | |
1162 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\ | |
1163 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\ | |
1164 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\ | |
1165 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\ | |
1166 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ | |
1167 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ | |
651 | 1168 |
1264 | 1169 #define op_avg(a, b) a = rnd_avg32(a, b) |
385 | 1170 #endif |
1171 #define op_put(a, b) a = b | |
1172 | |
1173 PIXOP2(avg, op_avg) | |
1174 PIXOP2(put, op_put) | |
1175 #undef op_avg | |
1176 #undef op_put | |
1177 | |
0 | 1178 #define avg2(a,b) ((a+b+1)>>1) |
1179 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) | |
1180 | |
1864 | 1181 static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ |
1182 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h); | |
1183 } | |
1184 | |
1185 static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ | |
1186 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h); | |
1187 } | |
753 | 1188 |
1064 | 1189 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) |
255 | 1190 { |
1191 const int A=(16-x16)*(16-y16); | |
1192 const int B=( x16)*(16-y16); | |
1193 const int C=(16-x16)*( y16); | |
1194 const int D=( x16)*( y16); | |
1195 int i; | |
1196 | |
1197 for(i=0; i<h; i++) | |
1198 { | |
651 | 1199 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; |
1200 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; | |
1201 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; | |
1202 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8; | |
1203 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8; | |
1204 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8; | |
1205 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; | |
1206 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; | |
1207 dst+= stride; | |
1208 src+= stride; | |
255 | 1209 } |
1210 } | |
1211 | |
3248
7aa9f80e7954
mmx implementation of 3-point GMC. (5x faster than C)
lorenm
parents:
3245
diff
changeset
|
1212 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, |
753 | 1213 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) |
1214 { | |
1215 int y, vx, vy; | |
1216 const int s= 1<<shift; | |
2967 | 1217 |
753 | 1218 width--; |
1219 height--; | |
1220 | |
1221 for(y=0; y<h; y++){ | |
1222 int x; | |
1223 | |
1224 vx= ox; | |
1225 vy= oy; | |
1226 for(x=0; x<8; x++){ //XXX FIXME optimize | |
1227 int src_x, src_y, frac_x, frac_y, index; | |
1228 | |
1229 src_x= vx>>16; | |
1230 src_y= vy>>16; | |
1231 frac_x= src_x&(s-1); | |
1232 frac_y= src_y&(s-1); | |
1233 src_x>>=shift; | |
1234 src_y>>=shift; | |
2967 | 1235 |
753 | 1236 if((unsigned)src_x < width){ |
1237 if((unsigned)src_y < height){ | |
1238 index= src_x + src_y*stride; | |
1239 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) | |
1240 + src[index +1]* frac_x )*(s-frac_y) | |
1241 + ( src[index+stride ]*(s-frac_x) | |
1242 + src[index+stride+1]* frac_x )* frac_y | |
1243 + r)>>(shift*2); | |
1244 }else{ | |
4594 | 1245 index= src_x + av_clip(src_y, 0, height)*stride; |
2967 | 1246 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) |
753 | 1247 + src[index +1]* frac_x )*s |
1248 + r)>>(shift*2); | |
1249 } | |
1250 }else{ | |
1251 if((unsigned)src_y < height){ | |
4594 | 1252 index= av_clip(src_x, 0, width) + src_y*stride; |
2967 | 1253 dst[y*stride + x]= ( ( src[index ]*(s-frac_y) |
753 | 1254 + src[index+stride ]* frac_y )*s |
1255 + r)>>(shift*2); | |
1256 }else{ | |
4594 | 1257 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride; |
753 | 1258 dst[y*stride + x]= src[index ]; |
1259 } | |
1260 } | |
2967 | 1261 |
753 | 1262 vx+= dxx; |
1263 vy+= dyx; | |
1264 } | |
1265 ox += dxy; | |
1266 oy += dyy; | |
1267 } | |
1268 } | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1269 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1270 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1271 switch(width){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1272 case 2: put_pixels2_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1273 case 4: put_pixels4_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1274 case 8: put_pixels8_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1275 case 16:put_pixels16_c(dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1276 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1277 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1278 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1279 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1280 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1281 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1282 for (j=0; j < width; j++) { |
2979 | 1283 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1284 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1285 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1286 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1287 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1288 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1289 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1290 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1291 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1292 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1293 for (j=0; j < width; j++) { |
2979 | 1294 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1295 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1296 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1297 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1298 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1299 } |
2967 | 1300 |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1301 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1302 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1303 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1304 for (j=0; j < width; j++) { |
2979 | 1305 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1306 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1307 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1308 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1309 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1310 } |
2967 | 1311 |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1312 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1313 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1314 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1315 for (j=0; j < width; j++) { |
2979 | 1316 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1317 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1318 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1319 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1320 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1321 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1322 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1323 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1324 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1325 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1326 for (j=0; j < width; j++) { |
2979 | 1327 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1328 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1329 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1330 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1331 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1332 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1333 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1334 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1335 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1336 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1337 for (j=0; j < width; j++) { |
2979 | 1338 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1339 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1340 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1341 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1342 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1343 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1344 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1345 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1346 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1347 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1348 for (j=0; j < width; j++) { |
2979 | 1349 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1350 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1351 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1352 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1353 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1354 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1355 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1356 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1357 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1358 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1359 for (j=0; j < width; j++) { |
2979 | 1360 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1361 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1362 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1363 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1364 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1365 } |
1319 | 1366 |
1367 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1368 switch(width){ | |
1369 case 2: avg_pixels2_c (dst, src, stride, height); break; | |
1370 case 4: avg_pixels4_c (dst, src, stride, height); break; | |
1371 case 8: avg_pixels8_c (dst, src, stride, height); break; | |
1372 case 16:avg_pixels16_c(dst, src, stride, height); break; | |
1373 } | |
1374 } | |
1375 | |
1376 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1377 int i,j; | |
1378 for (i=0; i < height; i++) { | |
1379 for (j=0; j < width; j++) { | |
2979 | 1380 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; |
1319 | 1381 } |
1382 src += stride; | |
1383 dst += stride; | |
1384 } | |
1385 } | |
1386 | |
1387 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1388 int i,j; | |
1389 for (i=0; i < height; i++) { | |
1390 for (j=0; j < width; j++) { | |
2979 | 1391 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; |
1319 | 1392 } |
1393 src += stride; | |
1394 dst += stride; | |
1395 } | |
1396 } | |
2967 | 1397 |
1319 | 1398 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
1399 int i,j; | |
1400 for (i=0; i < height; i++) { | |
1401 for (j=0; j < width; j++) { | |
2979 | 1402 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; |
1319 | 1403 } |
1404 src += stride; | |
1405 dst += stride; | |
1406 } | |
1407 } | |
2967 | 1408 |
1319 | 1409 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
1410 int i,j; | |
1411 for (i=0; i < height; i++) { | |
1412 for (j=0; j < width; j++) { | |
2979 | 1413 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1414 } |
1415 src += stride; | |
1416 dst += stride; | |
1417 } | |
1418 } | |
1419 | |
1420 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1421 int i,j; | |
1422 for (i=0; i < height; i++) { | |
1423 for (j=0; j < width; j++) { | |
2979 | 1424 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1425 } |
1426 src += stride; | |
1427 dst += stride; | |
1428 } | |
1429 } | |
1430 | |
1431 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1432 int i,j; | |
1433 for (i=0; i < height; i++) { | |
1434 for (j=0; j < width; j++) { | |
2979 | 1435 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; |
1319 | 1436 } |
1437 src += stride; | |
1438 dst += stride; | |
1439 } | |
1440 } | |
1441 | |
1442 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1443 int i,j; | |
1444 for (i=0; i < height; i++) { | |
1445 for (j=0; j < width; j++) { | |
2979 | 1446 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1447 } |
1448 src += stride; | |
1449 dst += stride; | |
1450 } | |
1451 } | |
1452 | |
1453 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1454 int i,j; | |
1455 for (i=0; i < height; i++) { | |
1456 for (j=0; j < width; j++) { | |
2979 | 1457 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1458 } |
1459 src += stride; | |
1460 dst += stride; | |
1461 } | |
1462 } | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1463 #if 0 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1464 #define TPEL_WIDTH(width)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1465 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1466 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1467 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1468 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1469 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1470 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1471 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1472 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1473 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1474 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1475 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1476 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1477 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1478 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1479 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1480 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1481 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1482 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);} |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1483 #endif |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1484 |
1168 | 1485 #define H264_CHROMA_MC(OPNAME, OP)\ |
1486 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1487 const int A=(8-x)*(8-y);\ | |
1488 const int B=( x)*(8-y);\ | |
1489 const int C=(8-x)*( y);\ | |
1490 const int D=( x)*( y);\ | |
1491 int i;\ | |
1492 \ | |
1493 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1494 \ | |
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1495 if(D){\ |
6054 | 1496 for(i=0; i<h; i++){\ |
6053 | 1497 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
1498 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1499 dst+= stride;\ | |
1500 src+= stride;\ | |
1501 }\ | |
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1502 }else{\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1503 const int E= B+C;\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1504 const int step= C ? stride : 1;\ |
6054 | 1505 for(i=0; i<h; i++){\ |
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1506 OP(dst[0], (A*src[0] + E*src[step+0]));\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1507 OP(dst[1], (A*src[1] + E*src[step+1]));\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1508 dst+= stride;\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1509 src+= stride;\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1510 }\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1511 }\ |
1168 | 1512 }\ |
1513 \ | |
1514 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1515 const int A=(8-x)*(8-y);\ | |
1516 const int B=( x)*(8-y);\ | |
1517 const int C=(8-x)*( y);\ | |
1518 const int D=( x)*( y);\ | |
1519 int i;\ | |
1520 \ | |
1521 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1522 \ | |
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1523 if(D){\ |
6054 | 1524 for(i=0; i<h; i++){\ |
6053 | 1525 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
1526 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1527 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | |
1528 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | |
1529 dst+= stride;\ | |
1530 src+= stride;\ | |
1531 }\ | |
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1532 }else{\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1533 const int E= B+C;\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1534 const int step= C ? stride : 1;\ |
6054 | 1535 for(i=0; i<h; i++){\ |
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1536 OP(dst[0], (A*src[0] + E*src[step+0]));\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1537 OP(dst[1], (A*src[1] + E*src[step+1]));\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1538 OP(dst[2], (A*src[2] + E*src[step+2]));\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1539 OP(dst[3], (A*src[3] + E*src[step+3]));\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1540 dst+= stride;\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1541 src+= stride;\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1542 }\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1543 }\ |
1168 | 1544 }\ |
1545 \ | |
1546 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1547 const int A=(8-x)*(8-y);\ | |
1548 const int B=( x)*(8-y);\ | |
1549 const int C=(8-x)*( y);\ | |
1550 const int D=( x)*( y);\ | |
1551 int i;\ | |
1552 \ | |
1553 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1554 \ | |
6051
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1555 if(D){\ |
6054 | 1556 for(i=0; i<h; i++){\ |
6053 | 1557 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
1558 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1559 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | |
1560 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | |
1561 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ | |
1562 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ | |
1563 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ | |
1564 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ | |
1565 dst+= stride;\ | |
1566 src+= stride;\ | |
1567 }\ | |
6051
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1568 }else{\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1569 const int E= B+C;\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1570 const int step= C ? stride : 1;\ |
6054 | 1571 for(i=0; i<h; i++){\ |
6051
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1572 OP(dst[0], (A*src[0] + E*src[step+0]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1573 OP(dst[1], (A*src[1] + E*src[step+1]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1574 OP(dst[2], (A*src[2] + E*src[step+2]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1575 OP(dst[3], (A*src[3] + E*src[step+3]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1576 OP(dst[4], (A*src[4] + E*src[step+4]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1577 OP(dst[5], (A*src[5] + E*src[step+5]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1578 OP(dst[6], (A*src[6] + E*src[step+6]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1579 OP(dst[7], (A*src[7] + E*src[step+7]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1580 dst+= stride;\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1581 src+= stride;\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1582 }\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1583 }\ |
1168 | 1584 } |
1585 | |
1586 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) | |
1587 #define op_put(a, b) a = (((b) + 32)>>6) | |
1588 | |
1589 H264_CHROMA_MC(put_ , op_put) | |
1590 H264_CHROMA_MC(avg_ , op_avg) | |
1591 #undef op_avg | |
1592 #undef op_put | |
1593 | |
9439
ef3a7b711cc0
Rename put_no_rnd_h264_chroma* to reflect its usage in VC1 only
conrad
parents:
9437
diff
changeset
|
1594 static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ |
3663 | 1595 const int A=(8-x)*(8-y); |
1596 const int B=( x)*(8-y); | |
1597 const int C=(8-x)*( y); | |
1598 const int D=( x)*( y); | |
1599 int i; | |
1600 | |
1601 assert(x<8 && y<8 && x>=0 && y>=0); | |
1602 | |
1603 for(i=0; i<h; i++) | |
1604 { | |
1605 dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6; | |
1606 dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6; | |
1607 dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6; | |
1608 dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6; | |
1609 dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6; | |
1610 dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6; | |
1611 dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6; | |
1612 dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6; | |
1613 dst+= stride; | |
1614 src+= stride; | |
1615 } | |
1616 } | |
1617 | |
9440 | 1618 static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ |
1619 const int A=(8-x)*(8-y); | |
1620 const int B=( x)*(8-y); | |
1621 const int C=(8-x)*( y); | |
1622 const int D=( x)*( y); | |
1623 int i; | |
1624 | |
1625 assert(x<8 && y<8 && x>=0 && y>=0); | |
1626 | |
1627 for(i=0; i<h; i++) | |
1628 { | |
1629 dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6)); | |
1630 dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6)); | |
1631 dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6)); | |
1632 dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6)); | |
1633 dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6)); | |
1634 dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6)); | |
1635 dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6)); | |
1636 dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6)); | |
1637 dst+= stride; | |
1638 src+= stride; | |
1639 } | |
1640 } | |
1641 | |
651 | 1642 #define QPEL_MC(r, OPNAME, RND, OP) \ |
1064 | 1643 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
4176 | 1644 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
651 | 1645 int i;\ |
1646 for(i=0; i<h; i++)\ | |
1647 {\ | |
1648 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\ | |
1649 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\ | |
1650 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\ | |
1651 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\ | |
1652 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\ | |
1653 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\ | |
1654 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\ | |
1655 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\ | |
1656 dst+=dstStride;\ | |
1657 src+=srcStride;\ | |
1658 }\ | |
1659 }\ | |
1660 \ | |
1064 | 1661 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
984 | 1662 const int w=8;\ |
4176 | 1663 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
651 | 1664 int i;\ |
1665 for(i=0; i<w; i++)\ | |
1666 {\ | |
1667 const int src0= src[0*srcStride];\ | |
1668 const int src1= src[1*srcStride];\ | |
1669 const int src2= src[2*srcStride];\ | |
1670 const int src3= src[3*srcStride];\ | |
1671 const int src4= src[4*srcStride];\ | |
1672 const int src5= src[5*srcStride];\ | |
1673 const int src6= src[6*srcStride];\ | |
1674 const int src7= src[7*srcStride];\ | |
1675 const int src8= src[8*srcStride];\ | |
1676 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\ | |
1677 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\ | |
1678 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\ | |
1679 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\ | |
1680 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\ | |
1681 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\ | |
1682 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\ | |
1683 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ | |
1684 dst++;\ | |
1685 src++;\ | |
1686 }\ | |
1687 }\ | |
1688 \ | |
1064 | 1689 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
4176 | 1690 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
651 | 1691 int i;\ |
954 | 1692 \ |
651 | 1693 for(i=0; i<h; i++)\ |
1694 {\ | |
1695 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\ | |
1696 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\ | |
1697 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\ | |
1698 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\ | |
1699 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\ | |
1700 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\ | |
1701 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\ | |
1702 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\ | |
1703 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\ | |
1704 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\ | |
1705 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\ | |
1706 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\ | |
1707 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\ | |
1708 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\ | |
1709 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\ | |
1710 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\ | |
1711 dst+=dstStride;\ | |
1712 src+=srcStride;\ | |
1713 }\ | |
255 | 1714 }\ |
1715 \ | |
1064 | 1716 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
4176 | 1717 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
651 | 1718 int i;\ |
954 | 1719 const int w=16;\ |
651 | 1720 for(i=0; i<w; i++)\ |
1721 {\ | |
1722 const int src0= src[0*srcStride];\ | |
1723 const int src1= src[1*srcStride];\ | |
1724 const int src2= src[2*srcStride];\ | |
1725 const int src3= src[3*srcStride];\ | |
1726 const int src4= src[4*srcStride];\ | |
1727 const int src5= src[5*srcStride];\ | |
1728 const int src6= src[6*srcStride];\ | |
1729 const int src7= src[7*srcStride];\ | |
1730 const int src8= src[8*srcStride];\ | |
1731 const int src9= src[9*srcStride];\ | |
1732 const int src10= src[10*srcStride];\ | |
1733 const int src11= src[11*srcStride];\ | |
1734 const int src12= src[12*srcStride];\ | |
1735 const int src13= src[13*srcStride];\ | |
1736 const int src14= src[14*srcStride];\ | |
1737 const int src15= src[15*srcStride];\ | |
1738 const int src16= src[16*srcStride];\ | |
1739 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\ | |
1740 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\ | |
1741 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\ | |
1742 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\ | |
1743 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\ | |
1744 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\ | |
1745 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\ | |
1746 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\ | |
1747 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\ | |
1748 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\ | |
1749 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\ | |
1750 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\ | |
1751 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\ | |
1752 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\ | |
1753 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\ | |
1754 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ | |
1755 dst++;\ | |
1756 src++;\ | |
1757 }\ | |
255 | 1758 }\ |
1759 \ | |
1064 | 1760 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ |
859 | 1761 OPNAME ## pixels8_c(dst, src, stride, 8);\ |
255 | 1762 }\ |
1763 \ | |
1064 | 1764 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ |
1765 uint8_t half[64];\ | |
651 | 1766 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ |
1767 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\ | |
1768 }\ | |
1769 \ | |
1064 | 1770 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ |
651 | 1771 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\ |
255 | 1772 }\ |
1773 \ | |
1064 | 1774 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ |
1775 uint8_t half[64];\ | |
651 | 1776 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ |
1777 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\ | |
1778 }\ | |
1779 \ | |
1064 | 1780 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ |
1781 uint8_t full[16*9];\ | |
1782 uint8_t half[64];\ | |
651 | 1783 copy_block9(full, src, 16, stride, 9);\ |
984 | 1784 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ |
651 | 1785 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\ |
1786 }\ | |
1787 \ | |
1064 | 1788 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ |
1789 uint8_t full[16*9];\ | |
651 | 1790 copy_block9(full, src, 16, stride, 9);\ |
984 | 1791 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ |
255 | 1792 }\ |
1793 \ | |
1064 | 1794 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ |
1795 uint8_t full[16*9];\ | |
1796 uint8_t half[64];\ | |
651 | 1797 copy_block9(full, src, 16, stride, 9);\ |
984 | 1798 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ |
651 | 1799 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\ |
1800 }\ | |
1064 | 1801 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1802 uint8_t full[16*9];\ | |
1803 uint8_t halfH[72];\ | |
1804 uint8_t halfV[64];\ | |
1805 uint8_t halfHV[64];\ | |
651 | 1806 copy_block9(full, src, 16, stride, 9);\ |
1807 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1808 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1809 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1810 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1811 }\ |
1064 | 1812 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ |
1813 uint8_t full[16*9];\ | |
1814 uint8_t halfH[72];\ | |
1815 uint8_t halfHV[64];\ | |
984 | 1816 copy_block9(full, src, 16, stride, 9);\ |
1817 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1818 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1819 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1820 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
1821 }\ | |
1064 | 1822 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1823 uint8_t full[16*9];\ | |
1824 uint8_t halfH[72];\ | |
1825 uint8_t halfV[64];\ | |
1826 uint8_t halfHV[64];\ | |
651 | 1827 copy_block9(full, src, 16, stride, 9);\ |
1828 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1829 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1830 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1831 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1832 }\ |
1064 | 1833 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ |
1834 uint8_t full[16*9];\ | |
1835 uint8_t halfH[72];\ | |
1836 uint8_t halfHV[64];\ | |
984 | 1837 copy_block9(full, src, 16, stride, 9);\ |
1838 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1839 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1840 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1841 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
1842 }\ | |
1064 | 1843 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1844 uint8_t full[16*9];\ | |
1845 uint8_t halfH[72];\ | |
1846 uint8_t halfV[64];\ | |
1847 uint8_t halfHV[64];\ | |
651 | 1848 copy_block9(full, src, 16, stride, 9);\ |
1849 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1850 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1851 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1852 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
1853 }\ | |
1064 | 1854 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ |
1855 uint8_t full[16*9];\ | |
1856 uint8_t halfH[72];\ | |
1857 uint8_t halfHV[64];\ | |
984 | 1858 copy_block9(full, src, 16, stride, 9);\ |
1859 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1860 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1861 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1862 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
1863 }\ | |
1064 | 1864 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1865 uint8_t full[16*9];\ | |
1866 uint8_t halfH[72];\ | |
1867 uint8_t halfV[64];\ | |
1868 uint8_t halfHV[64];\ | |
651 | 1869 copy_block9(full, src, 16, stride, 9);\ |
1870 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\ | |
984 | 1871 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1872 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1873 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1874 }\ |
1064 | 1875 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ |
1876 uint8_t full[16*9];\ | |
1877 uint8_t halfH[72];\ | |
1878 uint8_t halfHV[64];\ | |
984 | 1879 copy_block9(full, src, 16, stride, 9);\ |
1880 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1881 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1882 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1883 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
1884 }\ | |
1064 | 1885 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ |
1886 uint8_t halfH[72];\ | |
1887 uint8_t halfHV[64];\ | |
651 | 1888 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1889 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ |
651 | 1890 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ |
1891 }\ | |
1064 | 1892 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ |
1893 uint8_t halfH[72];\ | |
1894 uint8_t halfHV[64];\ | |
651 | 1895 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1896 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ |
651 | 1897 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ |
1898 }\ | |
1064 | 1899 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1900 uint8_t full[16*9];\ | |
1901 uint8_t halfH[72];\ | |
1902 uint8_t halfV[64];\ | |
1903 uint8_t halfHV[64];\ | |
651 | 1904 copy_block9(full, src, 16, stride, 9);\ |
1905 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1906 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1907 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1908 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ |
255 | 1909 }\ |
1064 | 1910 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ |
1911 uint8_t full[16*9];\ | |
1912 uint8_t halfH[72];\ | |
984 | 1913 copy_block9(full, src, 16, stride, 9);\ |
1914 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1915 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1916 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
1917 }\ | |
1064 | 1918 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1919 uint8_t full[16*9];\ | |
1920 uint8_t halfH[72];\ | |
1921 uint8_t halfV[64];\ | |
1922 uint8_t halfHV[64];\ | |
651 | 1923 copy_block9(full, src, 16, stride, 9);\ |
1924 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1925 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1926 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1927 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ |
1928 }\ | |
1064 | 1929 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ |
1930 uint8_t full[16*9];\ | |
1931 uint8_t halfH[72];\ | |
984 | 1932 copy_block9(full, src, 16, stride, 9);\ |
1933 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1934 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1935 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
1936 }\ | |
1064 | 1937 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ |
1938 uint8_t halfH[72];\ | |
651 | 1939 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1940 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ |
651 | 1941 }\ |
1064 | 1942 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ |
859 | 1943 OPNAME ## pixels16_c(dst, src, stride, 16);\ |
255 | 1944 }\ |
651 | 1945 \ |
1064 | 1946 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ |
1947 uint8_t half[256];\ | |
651 | 1948 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ |
1949 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\ | |
1950 }\ | |
1951 \ | |
1064 | 1952 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ |
651 | 1953 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\ |
1954 }\ | |
1955 \ | |
1064 | 1956 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ |
1957 uint8_t half[256];\ | |
651 | 1958 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ |
1959 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\ | |
1960 }\ | |
1961 \ | |
1064 | 1962 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ |
1963 uint8_t full[24*17];\ | |
1964 uint8_t half[256];\ | |
651 | 1965 copy_block17(full, src, 24, stride, 17);\ |
954 | 1966 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ |
651 | 1967 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\ |
255 | 1968 }\ |
651 | 1969 \ |
1064 | 1970 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ |
1971 uint8_t full[24*17];\ | |
651 | 1972 copy_block17(full, src, 24, stride, 17);\ |
954 | 1973 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\ |
651 | 1974 }\ |
1975 \ | |
1064 | 1976 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ |
1977 uint8_t full[24*17];\ | |
1978 uint8_t half[256];\ | |
651 | 1979 copy_block17(full, src, 24, stride, 17);\ |
954 | 1980 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ |
651 | 1981 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\ |
255 | 1982 }\ |
1064 | 1983 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1984 uint8_t full[24*17];\ | |
1985 uint8_t halfH[272];\ | |
1986 uint8_t halfV[256];\ | |
1987 uint8_t halfHV[256];\ | |
651 | 1988 copy_block17(full, src, 24, stride, 17);\ |
1989 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1990 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
1991 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1992 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
1993 }\ | |
1064 | 1994 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ |
1995 uint8_t full[24*17];\ | |
1996 uint8_t halfH[272];\ | |
1997 uint8_t halfHV[256];\ | |
984 | 1998 copy_block17(full, src, 24, stride, 17);\ |
1999 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2000 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
2001 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
2002 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
2003 }\ | |
1064 | 2004 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
2005 uint8_t full[24*17];\ | |
2006 uint8_t halfH[272];\ | |
2007 uint8_t halfV[256];\ | |
2008 uint8_t halfHV[256];\ | |
651 | 2009 copy_block17(full, src, 24, stride, 17);\ |
2010 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 2011 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
2012 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 2013 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
2014 }\ | |
1064 | 2015 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ |
2016 uint8_t full[24*17];\ | |
2017 uint8_t halfH[272];\ | |
2018 uint8_t halfHV[256];\ | |
984 | 2019 copy_block17(full, src, 24, stride, 17);\ |
2020 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2021 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
2022 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
2023 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
2024 }\ | |
1064 | 2025 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
2026 uint8_t full[24*17];\ | |
2027 uint8_t halfH[272];\ | |
2028 uint8_t halfV[256];\ | |
2029 uint8_t halfHV[256];\ | |
651 | 2030 copy_block17(full, src, 24, stride, 17);\ |
2031 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 2032 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
2033 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 2034 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
255 | 2035 }\ |
1064 | 2036 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ |
2037 uint8_t full[24*17];\ | |
2038 uint8_t halfH[272];\ | |
2039 uint8_t halfHV[256];\ | |
984 | 2040 copy_block17(full, src, 24, stride, 17);\ |
2041 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2042 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
2043 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
2044 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
2045 }\ | |
1064 | 2046 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
2047 uint8_t full[24*17];\ | |
2048 uint8_t halfH[272];\ | |
2049 uint8_t halfV[256];\ | |
2050 uint8_t halfHV[256];\ | |
651 | 2051 copy_block17(full, src, 24, stride, 17);\ |
2052 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\ | |
954 | 2053 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
2054 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 2055 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
2056 }\ | |
1064 | 2057 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ |
2058 uint8_t full[24*17];\ | |
2059 uint8_t halfH[272];\ | |
2060 uint8_t halfHV[256];\ | |
984 | 2061 copy_block17(full, src, 24, stride, 17);\ |
2062 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2063 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
2064 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
2065 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
2066 }\ | |
1064 | 2067 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ |
2068 uint8_t halfH[272];\ | |
2069 uint8_t halfHV[256];\ | |
651 | 2070 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 2071 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ |
651 | 2072 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ |
255 | 2073 }\ |
1064 | 2074 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ |
2075 uint8_t halfH[272];\ | |
2076 uint8_t halfHV[256];\ | |
651 | 2077 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 2078 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ |
651 | 2079 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ |
2080 }\ | |
1064 | 2081 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
2082 uint8_t full[24*17];\ | |
2083 uint8_t halfH[272];\ | |
2084 uint8_t halfV[256];\ | |
2085 uint8_t halfHV[256];\ | |
651 | 2086 copy_block17(full, src, 24, stride, 17);\ |
2087 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 2088 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
2089 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 2090 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ |
255 | 2091 }\ |
1064 | 2092 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ |
2093 uint8_t full[24*17];\ | |
2094 uint8_t halfH[272];\ | |
984 | 2095 copy_block17(full, src, 24, stride, 17);\ |
2096 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2097 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
2098 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
2099 }\ | |
1064 | 2100 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
2101 uint8_t full[24*17];\ | |
2102 uint8_t halfH[272];\ | |
2103 uint8_t halfV[256];\ | |
2104 uint8_t halfHV[256];\ | |
651 | 2105 copy_block17(full, src, 24, stride, 17);\ |
2106 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 2107 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
2108 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 2109 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ |
2110 }\ | |
1064 | 2111 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ |
2112 uint8_t full[24*17];\ | |
2113 uint8_t halfH[272];\ | |
984 | 2114 copy_block17(full, src, 24, stride, 17);\ |
2115 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2116 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
2117 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
2118 }\ | |
1064 | 2119 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ |
2120 uint8_t halfH[272];\ | |
651 | 2121 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 2122 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ |
859 | 2123 } |
255 | 2124 |
651 | 2125 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) |
2126 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1) | |
2127 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
2128 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5] | |
2129 | |
2130 QPEL_MC(0, put_ , _ , op_put) | |
2131 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) | |
2132 QPEL_MC(0, avg_ , _ , op_avg) | |
2133 //QPEL_MC(1, avg_no_rnd , _ , op_avg) | |
2134 #undef op_avg | |
2135 #undef op_avg_no_rnd | |
2136 #undef op_put | |
2137 #undef op_put_no_rnd | |
255 | 2138 |
1168 | 2139 #if 1 |
2140 #define H264_LOWPASS(OPNAME, OP, OP2) \ | |
5151 | 2141 static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2142 const int h=2;\ |
4176 | 2143 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2144 int i;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2145 for(i=0; i<h; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2146 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2147 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2148 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2149 dst+=dstStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2150 src+=srcStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2151 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2152 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2153 \ |
5151 | 2154 static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2155 const int w=2;\ |
4176 | 2156 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2157 int i;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2158 for(i=0; i<w; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2159 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2160 const int srcB= src[-2*srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2161 const int srcA= src[-1*srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2162 const int src0= src[0 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2163 const int src1= src[1 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2164 const int src2= src[2 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2165 const int src3= src[3 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2166 const int src4= src[4 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2167 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2168 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2169 dst++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2170 src++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2171 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2172 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2173 \ |
5151 | 2174 static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2175 const int h=2;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2176 const int w=2;\ |
4176 | 2177 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2178 int i;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2179 src -= 2*srcStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2180 for(i=0; i<h+5; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2181 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2182 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2183 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2184 tmp+=tmpStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2185 src+=srcStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2186 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2187 tmp -= tmpStride*(h+5-2);\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2188 for(i=0; i<w; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2189 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2190 const int tmpB= tmp[-2*tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2191 const int tmpA= tmp[-1*tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2192 const int tmp0= tmp[0 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2193 const int tmp1= tmp[1 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2194 const int tmp2= tmp[2 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2195 const int tmp3= tmp[3 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2196 const int tmp4= tmp[4 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2197 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2198 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2199 dst++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2200 tmp++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2201 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2202 }\ |
1168 | 2203 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
2204 const int h=4;\ | |
4176 | 2205 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2206 int i;\ |
2207 for(i=0; i<h; i++)\ | |
2208 {\ | |
2209 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ | |
2210 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ | |
2211 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ | |
2212 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ | |
2213 dst+=dstStride;\ | |
2214 src+=srcStride;\ | |
2215 }\ | |
2216 }\ | |
2217 \ | |
2218 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2219 const int w=4;\ | |
4176 | 2220 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2221 int i;\ |
2222 for(i=0; i<w; i++)\ | |
2223 {\ | |
2224 const int srcB= src[-2*srcStride];\ | |
2225 const int srcA= src[-1*srcStride];\ | |
2226 const int src0= src[0 *srcStride];\ | |
2227 const int src1= src[1 *srcStride];\ | |
2228 const int src2= src[2 *srcStride];\ | |
2229 const int src3= src[3 *srcStride];\ | |
2230 const int src4= src[4 *srcStride];\ | |
2231 const int src5= src[5 *srcStride];\ | |
2232 const int src6= src[6 *srcStride];\ | |
2233 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
2234 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
2235 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
2236 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
2237 dst++;\ | |
2238 src++;\ | |
2239 }\ | |
2240 }\ | |
2241 \ | |
2242 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2243 const int h=4;\ | |
2244 const int w=4;\ | |
4176 | 2245 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2246 int i;\ |
2247 src -= 2*srcStride;\ | |
2248 for(i=0; i<h+5; i++)\ | |
2249 {\ | |
2250 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ | |
2251 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ | |
2252 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\ | |
2253 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\ | |
2254 tmp+=tmpStride;\ | |
2255 src+=srcStride;\ | |
2256 }\ | |
2257 tmp -= tmpStride*(h+5-2);\ | |
2258 for(i=0; i<w; i++)\ | |
2259 {\ | |
2260 const int tmpB= tmp[-2*tmpStride];\ | |
2261 const int tmpA= tmp[-1*tmpStride];\ | |
2262 const int tmp0= tmp[0 *tmpStride];\ | |
2263 const int tmp1= tmp[1 *tmpStride];\ | |
2264 const int tmp2= tmp[2 *tmpStride];\ | |
2265 const int tmp3= tmp[3 *tmpStride];\ | |
2266 const int tmp4= tmp[4 *tmpStride];\ | |
2267 const int tmp5= tmp[5 *tmpStride];\ | |
2268 const int tmp6= tmp[6 *tmpStride];\ | |
2269 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |
2270 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |
2271 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | |
2272 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | |
2273 dst++;\ | |
2274 tmp++;\ | |
2275 }\ | |
2276 }\ | |
2277 \ | |
2278 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2279 const int h=8;\ | |
4176 | 2280 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2281 int i;\ |
2282 for(i=0; i<h; i++)\ | |
2283 {\ | |
2284 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\ | |
2285 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\ | |
2286 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\ | |
2287 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\ | |
2288 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\ | |
2289 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\ | |
2290 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\ | |
2291 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\ | |
2292 dst+=dstStride;\ | |
2293 src+=srcStride;\ | |
2294 }\ | |
2295 }\ | |
2296 \ | |
2297 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2298 const int w=8;\ | |
4176 | 2299 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2300 int i;\ |
2301 for(i=0; i<w; i++)\ | |
2302 {\ | |
2303 const int srcB= src[-2*srcStride];\ | |
2304 const int srcA= src[-1*srcStride];\ | |
2305 const int src0= src[0 *srcStride];\ | |
2306 const int src1= src[1 *srcStride];\ | |
2307 const int src2= src[2 *srcStride];\ | |
2308 const int src3= src[3 *srcStride];\ | |
2309 const int src4= src[4 *srcStride];\ | |
2310 const int src5= src[5 *srcStride];\ | |
2311 const int src6= src[6 *srcStride];\ | |
2312 const int src7= src[7 *srcStride];\ | |
2313 const int src8= src[8 *srcStride];\ | |
2314 const int src9= src[9 *srcStride];\ | |
2315 const int src10=src[10*srcStride];\ | |
2316 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
2317 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
2318 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
2319 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
2320 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ | |
2321 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ | |
2322 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ | |
2323 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ | |
2324 dst++;\ | |
2325 src++;\ | |
2326 }\ | |
2327 }\ | |
2328 \ | |
2329 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2330 const int h=8;\ | |
2331 const int w=8;\ | |
4176 | 2332 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2333 int i;\ |
2334 src -= 2*srcStride;\ | |
2335 for(i=0; i<h+5; i++)\ | |
2336 {\ | |
2337 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\ | |
2338 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\ | |
2339 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\ | |
2340 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\ | |
2341 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\ | |
2342 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\ | |
2343 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\ | |
2344 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\ | |
2345 tmp+=tmpStride;\ | |
2346 src+=srcStride;\ | |
2347 }\ | |
2348 tmp -= tmpStride*(h+5-2);\ | |
2349 for(i=0; i<w; i++)\ | |
2350 {\ | |
2351 const int tmpB= tmp[-2*tmpStride];\ | |
2352 const int tmpA= tmp[-1*tmpStride];\ | |
2353 const int tmp0= tmp[0 *tmpStride];\ | |
2354 const int tmp1= tmp[1 *tmpStride];\ | |
2355 const int tmp2= tmp[2 *tmpStride];\ | |
2356 const int tmp3= tmp[3 *tmpStride];\ | |
2357 const int tmp4= tmp[4 *tmpStride];\ | |
2358 const int tmp5= tmp[5 *tmpStride];\ | |
2359 const int tmp6= tmp[6 *tmpStride];\ | |
2360 const int tmp7= tmp[7 *tmpStride];\ | |
2361 const int tmp8= tmp[8 *tmpStride];\ | |
2362 const int tmp9= tmp[9 *tmpStride];\ | |
2363 const int tmp10=tmp[10*tmpStride];\ | |
2364 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |
2365 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |
2366 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | |
2367 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | |
2368 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\ | |
2369 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\ | |
2370 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\ | |
2371 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\ | |
2372 dst++;\ | |
2373 tmp++;\ | |
2374 }\ | |
2375 }\ | |
2376 \ | |
2377 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2378 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | |
2379 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2380 src += 8*srcStride;\ | |
2381 dst += 8*dstStride;\ | |
2382 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | |
2383 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2384 }\ | |
2385 \ | |
2386 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2387 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | |
2388 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2389 src += 8*srcStride;\ | |
2390 dst += 8*dstStride;\ | |
2391 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | |
2392 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2393 }\ | |
2394 \ | |
2395 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2396 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | |
2397 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | |
2398 src += 8*srcStride;\ | |
2399 dst += 8*dstStride;\ | |
2400 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | |
2401 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | |
2402 }\ | |
2403 | |
2404 #define H264_MC(OPNAME, SIZE) \ | |
2405 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\ | |
2406 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ | |
2407 }\ | |
2408 \ | |
2409 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2410 uint8_t half[SIZE*SIZE];\ | |
2411 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
2412 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\ | |
2413 }\ | |
2414 \ | |
2415 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2416 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ | |
2417 }\ | |
2418 \ | |
2419 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2420 uint8_t half[SIZE*SIZE];\ | |
2421 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
2422 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\ | |
2423 }\ | |
2424 \ | |
2425 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2426 uint8_t full[SIZE*(SIZE+5)];\ | |
2427 uint8_t * const full_mid= full + SIZE*2;\ | |
2428 uint8_t half[SIZE*SIZE];\ | |
2429 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2430 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
2431 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ | |
2432 }\ | |
2433 \ | |
2434 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2435 uint8_t full[SIZE*(SIZE+5)];\ | |
2436 uint8_t * const full_mid= full + SIZE*2;\ | |
2437 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2438 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ | |
2439 }\ | |
2440 \ | |
2441 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2442 uint8_t full[SIZE*(SIZE+5)];\ | |
2443 uint8_t * const full_mid= full + SIZE*2;\ | |
2444 uint8_t half[SIZE*SIZE];\ | |
2445 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2446 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
2447 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ | |
2448 }\ | |
2449 \ | |
2450 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2451 uint8_t full[SIZE*(SIZE+5)];\ | |
2452 uint8_t * const full_mid= full + SIZE*2;\ | |
2453 uint8_t halfH[SIZE*SIZE];\ | |
2454 uint8_t halfV[SIZE*SIZE];\ | |
2455 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2456 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2457 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2458 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2459 }\ | |
2460 \ | |
2461 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2462 uint8_t full[SIZE*(SIZE+5)];\ | |
2463 uint8_t * const full_mid= full + SIZE*2;\ | |
2464 uint8_t halfH[SIZE*SIZE];\ | |
2465 uint8_t halfV[SIZE*SIZE];\ | |
2466 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2467 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2468 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2469 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2470 }\ | |
2471 \ | |
2472 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2473 uint8_t full[SIZE*(SIZE+5)];\ | |
2474 uint8_t * const full_mid= full + SIZE*2;\ | |
2475 uint8_t halfH[SIZE*SIZE];\ | |
2476 uint8_t halfV[SIZE*SIZE];\ | |
2477 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2478 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2479 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2480 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2481 }\ | |
2482 \ | |
2483 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2484 uint8_t full[SIZE*(SIZE+5)];\ | |
2485 uint8_t * const full_mid= full + SIZE*2;\ | |
2486 uint8_t halfH[SIZE*SIZE];\ | |
2487 uint8_t halfV[SIZE*SIZE];\ | |
2488 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2489 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2490 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2491 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2492 }\ | |
2493 \ | |
2494 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2495 int16_t tmp[SIZE*(SIZE+5)];\ | |
2496 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ | |
2497 }\ | |
2498 \ | |
2499 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2500 int16_t tmp[SIZE*(SIZE+5)];\ | |
2501 uint8_t halfH[SIZE*SIZE];\ | |
2502 uint8_t halfHV[SIZE*SIZE];\ | |
2503 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2504 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2505 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2506 }\ | |
2507 \ | |
2508 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2509 int16_t tmp[SIZE*(SIZE+5)];\ | |
2510 uint8_t halfH[SIZE*SIZE];\ | |
2511 uint8_t halfHV[SIZE*SIZE];\ | |
2512 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2513 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2514 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2515 }\ | |
2516 \ | |
2517 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2518 uint8_t full[SIZE*(SIZE+5)];\ | |
2519 uint8_t * const full_mid= full + SIZE*2;\ | |
2520 int16_t tmp[SIZE*(SIZE+5)];\ | |
2521 uint8_t halfV[SIZE*SIZE];\ | |
2522 uint8_t halfHV[SIZE*SIZE];\ | |
2523 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2524 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2525 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2526 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2527 }\ | |
2528 \ | |
2529 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2530 uint8_t full[SIZE*(SIZE+5)];\ | |
2531 uint8_t * const full_mid= full + SIZE*2;\ | |
2532 int16_t tmp[SIZE*(SIZE+5)];\ | |
2533 uint8_t halfV[SIZE*SIZE];\ | |
2534 uint8_t halfHV[SIZE*SIZE];\ | |
2535 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2536 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2537 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2538 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2539 }\ | |
2540 | |
2541 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) | |
2542 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) | |
2543 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
2544 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) | |
2545 #define op2_put(a, b) a = cm[((b) + 512)>>10] | |
2546 | |
2547 H264_LOWPASS(put_ , op_put, op2_put) | |
2548 H264_LOWPASS(avg_ , op_avg, op2_avg) | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2549 H264_MC(put_, 2) |
1168 | 2550 H264_MC(put_, 4) |
2551 H264_MC(put_, 8) | |
2552 H264_MC(put_, 16) | |
2553 H264_MC(avg_, 4) | |
2554 H264_MC(avg_, 8) | |
2555 H264_MC(avg_, 16) | |
2556 | |
2557 #undef op_avg | |
2558 #undef op_put | |
2559 #undef op2_avg | |
2560 #undef op2_put | |
2561 #endif | |
2562 | |
936 | 2563 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ |
4176 | 2564 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
936 | 2565 int i; |
2566 | |
2567 for(i=0; i<h; i++){ | |
2568 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4]; | |
2569 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4]; | |
2570 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4]; | |
2571 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4]; | |
2572 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4]; | |
2573 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; | |
2574 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; | |
2575 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; | |
2576 dst+=dstStride; | |
2967 | 2577 src+=srcStride; |
936 | 2578 } |
2579 } | |
2580 | |
8590 | 2581 #if CONFIG_CAVS_DECODER |
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2582 /* AVS specific */ |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2583 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2584 put_pixels8_c(dst, src, stride, 8); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2585 } |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2586 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2587 avg_pixels8_c(dst, src, stride, 8); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2588 } |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2589 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2590 put_pixels16_c(dst, src, stride, 16); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2591 } |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2592 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2593 avg_pixels16_c(dst, src, stride, 16); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2594 } |
3432 | 2595 #endif /* CONFIG_CAVS_DECODER */ |
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2596 |
9995
3141f69e3905
Do not check for both CONFIG_VC1_DECODER and CONFIG_WMV3_DECODER,
diego
parents:
9975
diff
changeset
|
2597 #if CONFIG_VC1_DECODER |
3526 | 2598 /* VC-1 specific */ |
11378
f46b68960464
Move some VC1 dsp prototypes to dsputil.h; they are defined in dsputil.c
mru
parents:
11376
diff
changeset
|
2599 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
3526 | 2600 put_pixels8_c(dst, src, stride, 8); |
2601 } | |
11378
f46b68960464
Move some VC1 dsp prototypes to dsputil.h; they are defined in dsputil.c
mru
parents:
11376
diff
changeset
|
2602 void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
9437 | 2603 avg_pixels8_c(dst, src, stride, 8); |
2604 } | |
9995
3141f69e3905
Do not check for both CONFIG_VC1_DECODER and CONFIG_WMV3_DECODER,
diego
parents:
9975
diff
changeset
|
2605 #endif /* CONFIG_VC1_DECODER */ |
3526 | 2606 |
8590 | 2607 #if CONFIG_RV40_DECODER |
8232 | 2608 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ |
2609 put_pixels16_xy2_c(dst, src, stride, 16); | |
2610 } | |
2611 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | |
2612 avg_pixels16_xy2_c(dst, src, stride, 16); | |
2613 } | |
2614 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | |
2615 put_pixels8_xy2_c(dst, src, stride, 8); | |
2616 } | |
2617 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | |
2618 avg_pixels8_xy2_c(dst, src, stride, 8); | |
2619 } | |
2620 #endif /* CONFIG_RV40_DECODER */ | |
2621 | |
936 | 2622 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ |
4176 | 2623 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
936 | 2624 int i; |
2625 | |
2626 for(i=0; i<w; i++){ | |
2627 const int src_1= src[ -srcStride]; | |
2628 const int src0 = src[0 ]; | |
2629 const int src1 = src[ srcStride]; | |
2630 const int src2 = src[2*srcStride]; | |
2631 const int src3 = src[3*srcStride]; | |
2632 const int src4 = src[4*srcStride]; | |
2633 const int src5 = src[5*srcStride]; | |
2634 const int src6 = src[6*srcStride]; | |
2635 const int src7 = src[7*srcStride]; | |
2636 const int src8 = src[8*srcStride]; | |
2637 const int src9 = src[9*srcStride]; | |
2638 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; | |
2639 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; | |
2640 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; | |
2641 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; | |
2642 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; | |
2643 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; | |
2644 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; | |
2645 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; | |
2646 src++; | |
2647 dst++; | |
2648 } | |
2649 } | |
2650 | |
2651 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){ | |
2652 put_pixels8_c(dst, src, stride, 8); | |
2653 } | |
2654 | |
2655 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ | |
2656 uint8_t half[64]; | |
2657 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | |
2658 put_pixels8_l2(dst, src, half, stride, stride, 8, 8); | |
2659 } | |
2660 | |
2661 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ | |
2662 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); | |
2663 } | |
2664 | |
2665 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ | |
2666 uint8_t half[64]; | |
2667 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | |
2668 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8); | |
2669 } | |
2670 | |
2671 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ | |
2672 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); | |
2673 } | |
2674 | |
2675 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ | |
2676 uint8_t halfH[88]; | |
2677 uint8_t halfV[64]; | |
2678 uint8_t halfHV[64]; | |
2679 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2680 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); | |
2681 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | |
2682 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | |
2683 } | |
2684 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ | |
2685 uint8_t halfH[88]; | |
2686 uint8_t halfV[64]; | |
2687 uint8_t halfHV[64]; | |
2688 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2689 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); | |
2690 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | |
2691 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | |
2692 } | |
2693 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ | |
2694 uint8_t halfH[88]; | |
2695 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2696 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); | |
2697 } | |
2698 | |
1644 | 2699 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){ |
10749
5cca4b6c459d
Get rid of pointless CONFIG_ANY_H263 preprocessor definition.
diego
parents:
10748
diff
changeset
|
2700 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { |
1644 | 2701 int x; |
2702 const int strength= ff_h263_loop_filter_strength[qscale]; | |
2967 | 2703 |
1644 | 2704 for(x=0; x<8; x++){ |
2705 int d1, d2, ad1; | |
2706 int p0= src[x-2*stride]; | |
2707 int p1= src[x-1*stride]; | |
2708 int p2= src[x+0*stride]; | |
2709 int p3= src[x+1*stride]; | |
2710 int d = (p0 - p3 + 4*(p2 - p1)) / 8; | |
2711 | |
2712 if (d<-2*strength) d1= 0; | |
2713 else if(d<- strength) d1=-2*strength - d; | |
2714 else if(d< strength) d1= d; | |
2715 else if(d< 2*strength) d1= 2*strength - d; | |
2716 else d1= 0; | |
2967 | 2717 |
1644 | 2718 p1 += d1; |
2719 p2 -= d1; | |
2720 if(p1&256) p1= ~(p1>>31); | |
2721 if(p2&256) p2= ~(p2>>31); | |
2967 | 2722 |
1644 | 2723 src[x-1*stride] = p1; |
2724 src[x+0*stride] = p2; | |
2725 | |
4001 | 2726 ad1= FFABS(d1)>>1; |
2967 | 2727 |
4594 | 2728 d2= av_clip((p0-p3)/4, -ad1, ad1); |
2967 | 2729 |
1644 | 2730 src[x-2*stride] = p0 - d2; |
2731 src[x+ stride] = p3 + d2; | |
2732 } | |
5394
e9a6215f4e3a
help some gcc version to optimize out those functions
aurel
parents:
5291
diff
changeset
|
2733 } |
1644 | 2734 } |
2735 | |
2736 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ | |
10749
5cca4b6c459d
Get rid of pointless CONFIG_ANY_H263 preprocessor definition.
diego
parents:
10748
diff
changeset
|
2737 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { |
1644 | 2738 int y; |
2739 const int strength= ff_h263_loop_filter_strength[qscale]; | |
2967 | 2740 |
1644 | 2741 for(y=0; y<8; y++){ |
2742 int d1, d2, ad1; | |
2743 int p0= src[y*stride-2]; | |
2744 int p1= src[y*stride-1]; | |
2745 int p2= src[y*stride+0]; | |
2746 int p3= src[y*stride+1]; | |
2747 int d = (p0 - p3 + 4*(p2 - p1)) / 8; | |
2748 | |
2749 if (d<-2*strength) d1= 0; | |
2750 else if(d<- strength) d1=-2*strength - d; | |
2751 else if(d< strength) d1= d; | |
2752 else if(d< 2*strength) d1= 2*strength - d; | |
2753 else d1= 0; | |
2967 | 2754 |
1644 | 2755 p1 += d1; |
2756 p2 -= d1; | |
2757 if(p1&256) p1= ~(p1>>31); | |
2758 if(p2&256) p2= ~(p2>>31); | |
2967 | 2759 |
1644 | 2760 src[y*stride-1] = p1; |
2761 src[y*stride+0] = p2; | |
2762 | |
4001 | 2763 ad1= FFABS(d1)>>1; |
2967 | 2764 |
4594 | 2765 d2= av_clip((p0-p3)/4, -ad1, ad1); |
2967 | 2766 |
1644 | 2767 src[y*stride-2] = p0 - d2; |
2768 src[y*stride+1] = p3 + d2; | |
2769 } | |
5394
e9a6215f4e3a
help some gcc version to optimize out those functions
aurel
parents:
5291
diff
changeset
|
2770 } |
1644 | 2771 } |
936 | 2772 |
2045 | 2773 static void h261_loop_filter_c(uint8_t *src, int stride){ |
2774 int x,y,xy,yz; | |
2775 int temp[64]; | |
2776 | |
2777 for(x=0; x<8; x++){ | |
2778 temp[x ] = 4*src[x ]; | |
2779 temp[x + 7*8] = 4*src[x + 7*stride]; | |
2780 } | |
2781 for(y=1; y<7; y++){ | |
2782 for(x=0; x<8; x++){ | |
2783 xy = y * stride + x; | |
2784 yz = y * 8 + x; | |
2785 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride]; | |
2044
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2786 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2787 } |
2967 | 2788 |
2045 | 2789 for(y=0; y<8; y++){ |
2790 src[ y*stride] = (temp[ y*8] + 2)>>2; | |
2791 src[7+y*stride] = (temp[7+y*8] + 2)>>2; | |
2792 for(x=1; x<7; x++){ | |
2793 xy = y * stride + x; | |
2794 yz = y * 8 + x; | |
2795 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4; | |
2044
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2796 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2797 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2798 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2799 |
1708 | 2800 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2801 { |
2802 int s, i; | |
2803 | |
2804 s = 0; | |
1708 | 2805 for(i=0;i<h;i++) { |
0 | 2806 s += abs(pix1[0] - pix2[0]); |
2807 s += abs(pix1[1] - pix2[1]); | |
2808 s += abs(pix1[2] - pix2[2]); | |
2809 s += abs(pix1[3] - pix2[3]); | |
2810 s += abs(pix1[4] - pix2[4]); | |
2811 s += abs(pix1[5] - pix2[5]); | |
2812 s += abs(pix1[6] - pix2[6]); | |
2813 s += abs(pix1[7] - pix2[7]); | |
2814 s += abs(pix1[8] - pix2[8]); | |
2815 s += abs(pix1[9] - pix2[9]); | |
2816 s += abs(pix1[10] - pix2[10]); | |
2817 s += abs(pix1[11] - pix2[11]); | |
2818 s += abs(pix1[12] - pix2[12]); | |
2819 s += abs(pix1[13] - pix2[13]); | |
2820 s += abs(pix1[14] - pix2[14]); | |
2821 s += abs(pix1[15] - pix2[15]); | |
2822 pix1 += line_size; | |
2823 pix2 += line_size; | |
2824 } | |
2825 return s; | |
2826 } | |
2827 | |
1708 | 2828 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2829 { |
2830 int s, i; | |
2831 | |
2832 s = 0; | |
1708 | 2833 for(i=0;i<h;i++) { |
0 | 2834 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
2835 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
2836 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
2837 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
2838 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
2839 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
2840 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
2841 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
2842 s += abs(pix1[8] - avg2(pix2[8], pix2[9])); | |
2843 s += abs(pix1[9] - avg2(pix2[9], pix2[10])); | |
2844 s += abs(pix1[10] - avg2(pix2[10], pix2[11])); | |
2845 s += abs(pix1[11] - avg2(pix2[11], pix2[12])); | |
2846 s += abs(pix1[12] - avg2(pix2[12], pix2[13])); | |
2847 s += abs(pix1[13] - avg2(pix2[13], pix2[14])); | |
2848 s += abs(pix1[14] - avg2(pix2[14], pix2[15])); | |
2849 s += abs(pix1[15] - avg2(pix2[15], pix2[16])); | |
2850 pix1 += line_size; | |
2851 pix2 += line_size; | |
2852 } | |
2853 return s; | |
2854 } | |
2855 | |
1708 | 2856 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2857 { |
2858 int s, i; | |
1064 | 2859 uint8_t *pix3 = pix2 + line_size; |
0 | 2860 |
2861 s = 0; | |
1708 | 2862 for(i=0;i<h;i++) { |
0 | 2863 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
2864 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
2865 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
2866 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
2867 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
2868 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
2869 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
2870 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
2871 s += abs(pix1[8] - avg2(pix2[8], pix3[8])); | |
2872 s += abs(pix1[9] - avg2(pix2[9], pix3[9])); | |
2873 s += abs(pix1[10] - avg2(pix2[10], pix3[10])); | |
2874 s += abs(pix1[11] - avg2(pix2[11], pix3[11])); | |
2875 s += abs(pix1[12] - avg2(pix2[12], pix3[12])); | |
2876 s += abs(pix1[13] - avg2(pix2[13], pix3[13])); | |
2877 s += abs(pix1[14] - avg2(pix2[14], pix3[14])); | |
2878 s += abs(pix1[15] - avg2(pix2[15], pix3[15])); | |
2879 pix1 += line_size; | |
2880 pix2 += line_size; | |
2881 pix3 += line_size; | |
2882 } | |
2883 return s; | |
2884 } | |
2885 | |
1708 | 2886 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2887 { |
2888 int s, i; | |
1064 | 2889 uint8_t *pix3 = pix2 + line_size; |
0 | 2890 |
2891 s = 0; | |
1708 | 2892 for(i=0;i<h;i++) { |
0 | 2893 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
2894 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
2895 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
2896 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
2897 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
2898 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
2899 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
2900 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
2901 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); | |
2902 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); | |
2903 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); | |
2904 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); | |
2905 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); | |
2906 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); | |
2907 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); | |
2908 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); | |
2909 pix1 += line_size; | |
2910 pix2 += line_size; | |
2911 pix3 += line_size; | |
2912 } | |
2913 return s; | |
2914 } | |
2915 | |
1708 | 2916 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2917 { |
2918 int s, i; | |
2919 | |
2920 s = 0; | |
1708 | 2921 for(i=0;i<h;i++) { |
294 | 2922 s += abs(pix1[0] - pix2[0]); |
2923 s += abs(pix1[1] - pix2[1]); | |
2924 s += abs(pix1[2] - pix2[2]); | |
2925 s += abs(pix1[3] - pix2[3]); | |
2926 s += abs(pix1[4] - pix2[4]); | |
2927 s += abs(pix1[5] - pix2[5]); | |
2928 s += abs(pix1[6] - pix2[6]); | |
2929 s += abs(pix1[7] - pix2[7]); | |
2930 pix1 += line_size; | |
2931 pix2 += line_size; | |
2932 } | |
2933 return s; | |
2934 } | |
2935 | |
1708 | 2936 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2937 { |
2938 int s, i; | |
2939 | |
2940 s = 0; | |
1708 | 2941 for(i=0;i<h;i++) { |
294 | 2942 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
2943 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
2944 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
2945 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
2946 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
2947 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
2948 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
2949 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
2950 pix1 += line_size; | |
2951 pix2 += line_size; | |
2952 } | |
2953 return s; | |
2954 } | |
2955 | |
1708 | 2956 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2957 { |
2958 int s, i; | |
1064 | 2959 uint8_t *pix3 = pix2 + line_size; |
294 | 2960 |
2961 s = 0; | |
1708 | 2962 for(i=0;i<h;i++) { |
294 | 2963 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
2964 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
2965 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
2966 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
2967 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
2968 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
2969 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
2970 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
2971 pix1 += line_size; | |
2972 pix2 += line_size; | |
2973 pix3 += line_size; | |
2974 } | |
2975 return s; | |
2976 } | |
2977 | |
1708 | 2978 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2979 { |
2980 int s, i; | |
1064 | 2981 uint8_t *pix3 = pix2 + line_size; |
294 | 2982 |
2983 s = 0; | |
1708 | 2984 for(i=0;i<h;i++) { |
294 | 2985 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
2986 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
2987 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
2988 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
2989 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
2990 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
2991 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
2992 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
2993 pix1 += line_size; | |
2994 pix2 += line_size; | |
2995 pix3 += line_size; | |
2996 } | |
2997 return s; | |
2998 } | |
2999 | |
2834 | 3000 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ |
3001 MpegEncContext *c = v; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3002 int score1=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3003 int score2=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3004 int x,y; |
2066 | 3005 |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3006 for(y=0; y<h; y++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3007 for(x=0; x<16; x++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3008 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3009 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3010 if(y+1<h){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3011 for(x=0; x<15; x++){ |
4001 | 3012 score2+= FFABS( s1[x ] - s1[x +stride] |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3013 - s1[x+1] + s1[x+1+stride]) |
4001 | 3014 -FFABS( s2[x ] - s2[x +stride] |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3015 - s2[x+1] + s2[x+1+stride]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3016 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3017 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3018 s1+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3019 s2+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3020 } |
2066 | 3021 |
4001 | 3022 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; |
3023 else return score1 + FFABS(score2)*8; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3024 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3025 |
2834 | 3026 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ |
3027 MpegEncContext *c = v; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3028 int score1=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3029 int score2=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3030 int x,y; |
2967 | 3031 |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3032 for(y=0; y<h; y++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3033 for(x=0; x<8; x++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3034 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3035 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3036 if(y+1<h){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3037 for(x=0; x<7; x++){ |
4001 | 3038 score2+= FFABS( s1[x ] - s1[x +stride] |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3039 - s1[x+1] + s1[x+1+stride]) |
4001 | 3040 -FFABS( s2[x ] - s2[x +stride] |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3041 - s2[x+1] + s2[x+1+stride]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3042 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3043 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3044 s1+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3045 s2+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3046 } |
2967 | 3047 |
4001 | 3048 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; |
3049 else return score1 + FFABS(score2)*8; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3050 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3051 |
1784 | 3052 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ |
3053 int i; | |
3054 unsigned int sum=0; | |
3055 | |
3056 for(i=0; i<8*8; i++){ | |
3057 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT)); | |
3058 int w= weight[i]; | |
3059 b>>= RECON_SHIFT; | |
3060 assert(-512<b && b<512); | |
3061 | |
3062 sum += (w*b)*(w*b)>>4; | |
3063 } | |
3064 return sum>>2; | |
3065 } | |
3066 | |
3067 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){ | |
3068 int i; | |
3069 | |
3070 for(i=0; i<8*8; i++){ | |
3071 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT); | |
2967 | 3072 } |
1784 | 3073 } |
3074 | |
1100 | 3075 /** |
3076 * permutes an 8x8 block. | |
1101 | 3077 * @param block the block which will be permuted according to the given permutation vector |
1100 | 3078 * @param permutation the permutation vector |
3079 * @param last the last non zero coefficient in scantable order, used to speed the permutation up | |
2967 | 3080 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not |
1101 | 3081 * (inverse) permutated to scantable order! |
1100 | 3082 */ |
1064 | 3083 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last) |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
3084 { |
764 | 3085 int i; |
945 | 3086 DCTELEM temp[64]; |
2967 | 3087 |
764 | 3088 if(last<=0) return; |
5129 | 3089 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
3090 |
764 | 3091 for(i=0; i<=last; i++){ |
3092 const int j= scantable[i]; | |
3093 temp[j]= block[j]; | |
3094 block[j]=0; | |
3095 } | |
2967 | 3096 |
764 | 3097 for(i=0; i<=last; i++){ |
3098 const int j= scantable[i]; | |
3099 const int perm_j= permutation[j]; | |
3100 block[perm_j]= temp[j]; | |
3101 } | |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
3102 } |
34 | 3103 |
1729 | 3104 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ |
3105 return 0; | |
3106 } | |
3107 | |
3108 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ | |
3109 int i; | |
2967 | 3110 |
8976
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
3111 memset(cmp, 0, sizeof(void*)*6); |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
3112 |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
3113 for(i=0; i<6; i++){ |
1729 | 3114 switch(type&0xFF){ |
3115 case FF_CMP_SAD: | |
3116 cmp[i]= c->sad[i]; | |
3117 break; | |
3118 case FF_CMP_SATD: | |
3119 cmp[i]= c->hadamard8_diff[i]; | |
3120 break; | |
3121 case FF_CMP_SSE: | |
3122 cmp[i]= c->sse[i]; | |
3123 break; | |
3124 case FF_CMP_DCT: | |
3125 cmp[i]= c->dct_sad[i]; | |
3126 break; | |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3127 case FF_CMP_DCT264: |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3128 cmp[i]= c->dct264_sad[i]; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3129 break; |
2382 | 3130 case FF_CMP_DCTMAX: |
3131 cmp[i]= c->dct_max[i]; | |
3132 break; | |
1729 | 3133 case FF_CMP_PSNR: |
3134 cmp[i]= c->quant_psnr[i]; | |
3135 break; | |
3136 case FF_CMP_BIT: | |
3137 cmp[i]= c->bit[i]; | |
3138 break; | |
3139 case FF_CMP_RD: | |
3140 cmp[i]= c->rd[i]; | |
3141 break; | |
3142 case FF_CMP_VSAD: | |
3143 cmp[i]= c->vsad[i]; | |
3144 break; | |
3145 case FF_CMP_VSSE: | |
3146 cmp[i]= c->vsse[i]; | |
3147 break; | |
3148 case FF_CMP_ZERO: | |
3149 cmp[i]= zero_cmp; | |
3150 break; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3151 case FF_CMP_NSSE: |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3152 cmp[i]= c->nsse[i]; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3153 break; |
11485 | 3154 #if CONFIG_DWT |
2184 | 3155 case FF_CMP_W53: |
3156 cmp[i]= c->w53[i]; | |
3157 break; | |
3158 case FF_CMP_W97: | |
3159 cmp[i]= c->w97[i]; | |
3160 break; | |
3373
b8996cc5ccae
Disable w53 and w97 cmp methods when snow encoder is disabled
gpoirier
parents:
3323
diff
changeset
|
3161 #endif |
1729 | 3162 default: |
3163 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); | |
3164 } | |
3165 } | |
3166 } | |
3167 | |
8288 | 3168 static void clear_block_c(DCTELEM *block) |
3169 { | |
3170 memset(block, 0, sizeof(DCTELEM)*64); | |
3171 } | |
3172 | |
1101 | 3173 /** |
3174 * memset(blocks, 0, sizeof(DCTELEM)*6*64) | |
3175 */ | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3176 static void clear_blocks_c(DCTELEM *blocks) |
296 | 3177 { |
3178 memset(blocks, 0, sizeof(DCTELEM)*6*64); | |
3179 } | |
3180 | |
866 | 3181 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ |
6385 | 3182 long i; |
3183 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ | |
3184 long a = *(long*)(src+i); | |
3185 long b = *(long*)(dst+i); | |
3186 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); | |
866 | 3187 } |
3188 for(; i<w; i++) | |
3189 dst[i+0] += src[i+0]; | |
3190 } | |
3191 | |
6384 | 3192 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ |
6385 | 3193 long i; |
6384 | 3194 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ |
3195 long a = *(long*)(src1+i); | |
3196 long b = *(long*)(src2+i); | |
6385 | 3197 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); |
6384 | 3198 } |
3199 for(; i<w; i++) | |
3200 dst[i] = src1[i]+src2[i]; | |
3201 } | |
3202 | |
866 | 3203 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ |
6385 | 3204 long i; |
8590 | 3205 #if !HAVE_FAST_UNALIGNED |
6385 | 3206 if((long)src2 & (sizeof(long)-1)){ |
6386 | 3207 for(i=0; i+7<w; i+=8){ |
3208 dst[i+0] = src1[i+0]-src2[i+0]; | |
3209 dst[i+1] = src1[i+1]-src2[i+1]; | |
3210 dst[i+2] = src1[i+2]-src2[i+2]; | |
3211 dst[i+3] = src1[i+3]-src2[i+3]; | |
3212 dst[i+4] = src1[i+4]-src2[i+4]; | |
3213 dst[i+5] = src1[i+5]-src2[i+5]; | |
3214 dst[i+6] = src1[i+6]-src2[i+6]; | |
3215 dst[i+7] = src1[i+7]-src2[i+7]; | |
3216 } | |
6385 | 3217 }else |
3218 #endif | |
3219 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ | |
3220 long a = *(long*)(src1+i); | |
3221 long b = *(long*)(src2+i); | |
3222 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80); | |
3223 } | |
866 | 3224 for(; i<w; i++) |
3225 dst[i+0] = src1[i+0]-src2[i+0]; | |
3226 } | |
3227 | |
10431 | 3228 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){ |
8760 | 3229 int i; |
3230 uint8_t l, lt; | |
3231 | |
3232 l= *left; | |
3233 lt= *left_top; | |
3234 | |
3235 for(i=0; i<w; i++){ | |
3236 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i]; | |
3237 lt= src1[i]; | |
3238 dst[i]= l; | |
3239 } | |
3240 | |
3241 *left= l; | |
3242 *left_top= lt; | |
3243 } | |
3244 | |
10431 | 3245 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){ |
1527 | 3246 int i; |
3247 uint8_t l, lt; | |
3248 | |
3249 l= *left; | |
3250 lt= *left_top; | |
3251 | |
3252 for(i=0; i<w; i++){ | |
3253 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); | |
3254 lt= src1[i]; | |
3255 l= src2[i]; | |
3256 dst[i]= l - pred; | |
2967 | 3257 } |
1527 | 3258 |
3259 *left= l; | |
3260 *left_top= lt; | |
3261 } | |
3262 | |
10420
442ab0c41eae
Huffyuv: Add missing const to src pointers in dsputil functions.
astrange
parents:
10370
diff
changeset
|
3263 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){ |
10370 | 3264 int i; |
3265 | |
3266 for(i=0; i<w-1; i++){ | |
3267 acc+= src[i]; | |
3268 dst[i]= acc; | |
3269 i++; | |
3270 acc+= src[i]; | |
3271 dst[i]= acc; | |
3272 } | |
3273 | |
3274 for(; i<w; i++){ | |
3275 acc+= src[i]; | |
3276 dst[i]= acc; | |
3277 } | |
3278 | |
3279 return acc; | |
3280 } | |
3281 | |
3282 #if HAVE_BIGENDIAN | |
3283 #define B 3 | |
3284 #define G 2 | |
3285 #define R 1 | |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3286 #define A 0 |
10370 | 3287 #else |
3288 #define B 0 | |
3289 #define G 1 | |
3290 #define R 2 | |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3291 #define A 3 |
10370 | 3292 #endif |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3293 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){ |
10370 | 3294 int i; |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3295 int r,g,b,a; |
10370 | 3296 r= *red; |
3297 g= *green; | |
3298 b= *blue; | |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3299 a= *alpha; |
10370 | 3300 |
3301 for(i=0; i<w; i++){ | |
3302 b+= src[4*i+B]; | |
3303 g+= src[4*i+G]; | |
3304 r+= src[4*i+R]; | |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3305 a+= src[4*i+A]; |
10370 | 3306 |
3307 dst[4*i+B]= b; | |
3308 dst[4*i+G]= g; | |
3309 dst[4*i+R]= r; | |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3310 dst[4*i+A]= a; |
10370 | 3311 } |
3312 | |
3313 *red= r; | |
3314 *green= g; | |
3315 *blue= b; | |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3316 *alpha= a; |
10370 | 3317 } |
3318 #undef B | |
3319 #undef G | |
3320 #undef R | |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3321 #undef A |
10370 | 3322 |
936 | 3323 #define BUTTERFLY2(o1,o2,i1,i2) \ |
3324 o1= (i1)+(i2);\ | |
3325 o2= (i1)-(i2); | |
3326 | |
3327 #define BUTTERFLY1(x,y) \ | |
3328 {\ | |
3329 int a,b;\ | |
3330 a= x;\ | |
3331 b= y;\ | |
3332 x= a+b;\ | |
3333 y= a-b;\ | |
3334 } | |
3335 | |
4001 | 3336 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y))) |
936 | 3337 |
1708 | 3338 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ |
936 | 3339 int i; |
3340 int temp[64]; | |
3341 int sum=0; | |
2967 | 3342 |
1708 | 3343 assert(h==8); |
936 | 3344 |
3345 for(i=0; i<8; i++){ | |
3346 //FIXME try pointer walks | |
3347 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]); | |
3348 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]); | |
3349 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]); | |
3350 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]); | |
2967 | 3351 |
936 | 3352 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); |
3353 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | |
3354 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | |
3355 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | |
2967 | 3356 |
936 | 3357 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); |
3358 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | |
3359 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | |
3360 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | |
3361 } | |
3362 | |
3363 for(i=0; i<8; i++){ | |
3364 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | |
3365 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | |
3366 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | |
3367 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | |
2967 | 3368 |
936 | 3369 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); |
3370 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | |
3371 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | |
3372 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | |
3373 | |
2967 | 3374 sum += |
936 | 3375 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) |
3376 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | |
3377 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | |
3378 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | |
3379 } | |
3380 #if 0 | |
3381 static int maxi=0; | |
3382 if(sum>maxi){ | |
3383 maxi=sum; | |
3384 printf("MAX:%d\n", maxi); | |
3385 } | |
3386 #endif | |
3387 return sum; | |
3388 } | |
3389 | |
1729 | 3390 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){ |
936 | 3391 int i; |
3392 int temp[64]; | |
3393 int sum=0; | |
2967 | 3394 |
1729 | 3395 assert(h==8); |
2967 | 3396 |
936 | 3397 for(i=0; i<8; i++){ |
3398 //FIXME try pointer walks | |
1729 | 3399 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]); |
3400 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]); | |
3401 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]); | |
3402 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]); | |
2967 | 3403 |
936 | 3404 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); |
3405 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | |
3406 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | |
3407 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | |
2967 | 3408 |
936 | 3409 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); |
3410 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | |
3411 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | |
3412 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | |
3413 } | |
3414 | |
3415 for(i=0; i<8; i++){ | |
3416 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | |
3417 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | |
3418 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | |
3419 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | |
2967 | 3420 |
936 | 3421 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); |
3422 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | |
3423 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | |
3424 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | |
2967 | 3425 |
3426 sum += | |
936 | 3427 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) |
3428 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | |
3429 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | |
3430 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | |
3431 } | |
2967 | 3432 |
4001 | 3433 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean |
2967 | 3434 |
936 | 3435 return sum; |
3436 } | |
3437 | |
1708 | 3438 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
936 | 3439 MpegEncContext * const s= (MpegEncContext *)c; |
11195 | 3440 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
2967 | 3441 |
1708 | 3442 assert(h==8); |
936 | 3443 |
3444 s->dsp.diff_pixels(temp, src1, src2, stride); | |
1092 | 3445 s->dsp.fdct(temp); |
4988
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
3446 return s->dsp.sum_abs_dctelem(temp); |
936 | 3447 } |
3448 | |
8590 | 3449 #if CONFIG_GPL |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3450 #define DCT8_1D {\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3451 const int s07 = SRC(0) + SRC(7);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3452 const int s16 = SRC(1) + SRC(6);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3453 const int s25 = SRC(2) + SRC(5);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3454 const int s34 = SRC(3) + SRC(4);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3455 const int a0 = s07 + s34;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3456 const int a1 = s16 + s25;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3457 const int a2 = s07 - s34;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3458 const int a3 = s16 - s25;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3459 const int d07 = SRC(0) - SRC(7);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3460 const int d16 = SRC(1) - SRC(6);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3461 const int d25 = SRC(2) - SRC(5);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3462 const int d34 = SRC(3) - SRC(4);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3463 const int a4 = d16 + d25 + (d07 + (d07>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3464 const int a5 = d07 - d34 - (d25 + (d25>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3465 const int a6 = d07 + d34 - (d16 + (d16>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3466 const int a7 = d16 - d25 + (d34 + (d34>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3467 DST(0, a0 + a1 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3468 DST(1, a4 + (a7>>2)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3469 DST(2, a2 + (a3>>1)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3470 DST(3, a5 + (a6>>2)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3471 DST(4, a0 - a1 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3472 DST(5, a6 - (a5>>2)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3473 DST(6, (a2>>1) - a3 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3474 DST(7, (a4>>2) - a7 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3475 } |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3476 |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3477 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3478 MpegEncContext * const s= (MpegEncContext *)c; |
5256 | 3479 DCTELEM dct[8][8]; |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3480 int i; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3481 int sum=0; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3482 |
5256 | 3483 s->dsp.diff_pixels(dct[0], src1, src2, stride); |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3484 |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3485 #define SRC(x) dct[i][x] |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3486 #define DST(x,v) dct[i][x]= v |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3487 for( i = 0; i < 8; i++ ) |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3488 DCT8_1D |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3489 #undef SRC |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3490 #undef DST |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3491 |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3492 #define SRC(x) dct[x][i] |
4001 | 3493 #define DST(x,v) sum += FFABS(v) |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3494 for( i = 0; i < 8; i++ ) |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3495 DCT8_1D |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3496 #undef SRC |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3497 #undef DST |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3498 return sum; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3499 } |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3500 #endif |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3501 |
2382 | 3502 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
3503 MpegEncContext * const s= (MpegEncContext *)c; | |
11195 | 3504 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
2382 | 3505 int sum=0, i; |
2967 | 3506 |
2382 | 3507 assert(h==8); |
3508 | |
3509 s->dsp.diff_pixels(temp, src1, src2, stride); | |
3510 s->dsp.fdct(temp); | |
3511 | |
3512 for(i=0; i<64; i++) | |
4001 | 3513 sum= FFMAX(sum, FFABS(temp[i])); |
2967 | 3514 |
2382 | 3515 return sum; |
3516 } | |
3517 | |
1708 | 3518 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
936 | 3519 MpegEncContext * const s= (MpegEncContext *)c; |
11195 | 3520 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]); |
11193 | 3521 DCTELEM * const bak = temp+64; |
936 | 3522 int sum=0, i; |
3523 | |
1708 | 3524 assert(h==8); |
936 | 3525 s->mb_intra=0; |
2967 | 3526 |
936 | 3527 s->dsp.diff_pixels(temp, src1, src2, stride); |
2967 | 3528 |
936 | 3529 memcpy(bak, temp, 64*sizeof(DCTELEM)); |
2967 | 3530 |
1013 | 3531 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
1689 | 3532 s->dct_unquantize_inter(s, temp, 0, s->qscale); |
6001 | 3533 ff_simple_idct(temp); //FIXME |
2967 | 3534 |
936 | 3535 for(i=0; i<64; i++) |
3536 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); | |
2967 | 3537 |
936 | 3538 return sum; |
3539 } | |
3540 | |
1708 | 3541 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
1007 | 3542 MpegEncContext * const s= (MpegEncContext *)c; |
1064 | 3543 const uint8_t *scantable= s->intra_scantable.permutated; |
11195 | 3544 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
3545 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]); | |
3546 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]); | |
6719 | 3547 int i, last, run, bits, level, distortion, start_i; |
1007 | 3548 const int esc_length= s->ac_esc_length; |
3549 uint8_t * length; | |
3550 uint8_t * last_length; | |
2967 | 3551 |
1708 | 3552 assert(h==8); |
3553 | |
10068 | 3554 copy_block8(lsrc1, src1, 8, stride, 8); |
3555 copy_block8(lsrc2, src2, 8, stride, 8); | |
3556 | |
3557 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8); | |
1007 | 3558 |
1013 | 3559 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
3560 | |
3561 bits=0; | |
2967 | 3562 |
1013 | 3563 if (s->mb_intra) { |
2967 | 3564 start_i = 1; |
1013 | 3565 length = s->intra_ac_vlc_length; |
3566 last_length= s->intra_ac_vlc_last_length; | |
3567 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma | |
3568 } else { | |
3569 start_i = 0; | |
3570 length = s->inter_ac_vlc_length; | |
3571 last_length= s->inter_ac_vlc_last_length; | |
3572 } | |
2967 | 3573 |
1013 | 3574 if(last>=start_i){ |
1007 | 3575 run=0; |
3576 for(i=start_i; i<last; i++){ | |
3577 int j= scantable[i]; | |
3578 level= temp[j]; | |
2967 | 3579 |
1007 | 3580 if(level){ |
3581 level+=64; | |
3582 if((level&(~127)) == 0){ | |
3583 bits+= length[UNI_AC_ENC_INDEX(run, level)]; | |
3584 }else | |
3585 bits+= esc_length; | |
3586 run=0; | |
3587 }else | |
3588 run++; | |
3589 } | |
3590 i= scantable[last]; | |
2967 | 3591 |
1011 | 3592 level= temp[i] + 64; |
3593 | |
3594 assert(level - 64); | |
2967 | 3595 |
1007 | 3596 if((level&(~127)) == 0){ |
3597 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | |
3598 }else | |
3599 bits+= esc_length; | |
2967 | 3600 |
1013 | 3601 } |
3602 | |
3603 if(last>=0){ | |
1689 | 3604 if(s->mb_intra) |
3605 s->dct_unquantize_intra(s, temp, 0, s->qscale); | |
3606 else | |
3607 s->dct_unquantize_inter(s, temp, 0, s->qscale); | |
1007 | 3608 } |
2967 | 3609 |
10068 | 3610 s->dsp.idct_add(lsrc2, 8, temp); |
3611 | |
3612 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8); | |
6719 | 3613 |
3614 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7); | |
1007 | 3615 } |
3616 | |
1708 | 3617 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
1007 | 3618 MpegEncContext * const s= (MpegEncContext *)c; |
1064 | 3619 const uint8_t *scantable= s->intra_scantable.permutated; |
11195 | 3620 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
1007 | 3621 int i, last, run, bits, level, start_i; |
3622 const int esc_length= s->ac_esc_length; | |
3623 uint8_t * length; | |
3624 uint8_t * last_length; | |
1708 | 3625 |
3626 assert(h==8); | |
2967 | 3627 |
1013 | 3628 s->dsp.diff_pixels(temp, src1, src2, stride); |
1007 | 3629 |
1013 | 3630 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
3631 | |
3632 bits=0; | |
2967 | 3633 |
1007 | 3634 if (s->mb_intra) { |
2967 | 3635 start_i = 1; |
1007 | 3636 length = s->intra_ac_vlc_length; |
3637 last_length= s->intra_ac_vlc_last_length; | |
1013 | 3638 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma |
1007 | 3639 } else { |
3640 start_i = 0; | |
3641 length = s->inter_ac_vlc_length; | |
3642 last_length= s->inter_ac_vlc_last_length; | |
3643 } | |
2967 | 3644 |
1013 | 3645 if(last>=start_i){ |
1007 | 3646 run=0; |
3647 for(i=start_i; i<last; i++){ | |
3648 int j= scantable[i]; | |
3649 level= temp[j]; | |
2967 | 3650 |
1007 | 3651 if(level){ |
3652 level+=64; | |
3653 if((level&(~127)) == 0){ | |
3654 bits+= length[UNI_AC_ENC_INDEX(run, level)]; | |
3655 }else | |
3656 bits+= esc_length; | |
3657 run=0; | |
3658 }else | |
3659 run++; | |
3660 } | |
3661 i= scantable[last]; | |
2967 | 3662 |
1013 | 3663 level= temp[i] + 64; |
2967 | 3664 |
1013 | 3665 assert(level - 64); |
2967 | 3666 |
1007 | 3667 if((level&(~127)) == 0){ |
3668 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | |
3669 }else | |
3670 bits+= esc_length; | |
3671 } | |
3672 | |
3673 return bits; | |
3674 } | |
3675 | |
8978 | 3676 #define VSAD_INTRA(size) \ |
3677 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \ | |
3678 int score=0; \ | |
3679 int x,y; \ | |
3680 \ | |
3681 for(y=1; y<h; y++){ \ | |
3682 for(x=0; x<size; x+=4){ \ | |
3683 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \ | |
3684 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \ | |
3685 } \ | |
3686 s+= stride; \ | |
3687 } \ | |
3688 \ | |
3689 return score; \ | |
1729 | 3690 } |
8978 | 3691 VSAD_INTRA(8) |
3692 VSAD_INTRA(16) | |
1729 | 3693 |
3694 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | |
3695 int score=0; | |
3696 int x,y; | |
2967 | 3697 |
1729 | 3698 for(y=1; y<h; y++){ |
3699 for(x=0; x<16; x++){ | |
4001 | 3700 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); |
1729 | 3701 } |
3702 s1+= stride; | |
3703 s2+= stride; | |
3704 } | |
2967 | 3705 |
1729 | 3706 return score; |
3707 } | |
3708 | |
3709 #define SQ(a) ((a)*(a)) | |
8978 | 3710 #define VSSE_INTRA(size) \ |
3711 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \ | |
3712 int score=0; \ | |
3713 int x,y; \ | |
3714 \ | |
3715 for(y=1; y<h; y++){ \ | |
3716 for(x=0; x<size; x+=4){ \ | |
3717 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \ | |
3718 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \ | |
3719 } \ | |
3720 s+= stride; \ | |
3721 } \ | |
3722 \ | |
3723 return score; \ | |
1729 | 3724 } |
8978 | 3725 VSSE_INTRA(8) |
3726 VSSE_INTRA(16) | |
1729 | 3727 |
3728 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | |
3729 int score=0; | |
3730 int x,y; | |
2967 | 3731 |
1729 | 3732 for(y=1; y<h; y++){ |
3733 for(x=0; x<16; x++){ | |
3734 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); | |
3735 } | |
3736 s1+= stride; | |
3737 s2+= stride; | |
3738 } | |
2967 | 3739 |
1729 | 3740 return score; |
3741 } | |
3742 | |
5255 | 3743 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, |
3744 int size){ | |
4749 | 3745 int score=0; |
3746 int i; | |
3747 for(i=0; i<size; i++) | |
3748 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]); | |
3749 return score; | |
3750 } | |
3751 | |
6056
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3752 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) |
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3753 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) |
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3754 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) |
8590 | 3755 #if CONFIG_GPL |
6056
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3756 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c) |
3013 | 3757 #endif |
6056
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3758 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c) |
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3759 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) |
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3760 WRAPPER8_16_SQ(rd8x8_c, rd16_c) |
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3761 WRAPPER8_16_SQ(bit8x8_c, bit16_c) |
936 | 3762 |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3763 static void vector_fmul_c(float *dst, const float *src, int len){ |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3764 int i; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3765 for(i=0; i<len; i++) |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3766 dst[i] *= src[i]; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3767 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3768 |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3769 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){ |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3770 int i; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3771 src1 += len-1; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3772 for(i=0; i<len; i++) |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3773 dst[i] = src0[i] * src1[-i]; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3774 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3775 |
10300
4d1b9ca628fc
Drop unused args from vector_fmul_add_add, simpify code, and rename
mru
parents:
10219
diff
changeset
|
3776 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){ |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3777 int i; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3778 for(i=0; i<len; i++) |
10300
4d1b9ca628fc
Drop unused args from vector_fmul_add_add, simpify code, and rename
mru
parents:
10219
diff
changeset
|
3779 dst[i] = src0[i] * src1[i] + src2[i]; |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3780 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3781 |
7261 | 3782 void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){ |
7263 | 3783 int i,j; |
3784 dst += len; | |
3785 win += len; | |
3786 src0+= len; | |
3787 for(i=-len, j=len-1; i<0; i++, j--) { | |
3788 float s0 = src0[i]; | |
3789 float s1 = src1[j]; | |
3790 float wi = win[i]; | |
3791 float wj = win[j]; | |
3792 dst[i] = s0*wj - s1*wi + add_bias; | |
3793 dst[j] = s0*wi + s1*wj + add_bias; | |
3794 } | |
7261 | 3795 } |
3796 | |
10219 | 3797 static void vector_fmul_scalar_c(float *dst, const float *src, float mul, |
3798 int len) | |
3799 { | |
3800 int i; | |
3801 for (i = 0; i < len; i++) | |
3802 dst[i] = src[i] * mul; | |
3803 } | |
3804 | |
3805 static void vector_fmul_sv_scalar_2_c(float *dst, const float *src, | |
3806 const float **sv, float mul, int len) | |
3807 { | |
3808 int i; | |
3809 for (i = 0; i < len; i += 2, sv++) { | |
3810 dst[i ] = src[i ] * sv[0][0] * mul; | |
3811 dst[i+1] = src[i+1] * sv[0][1] * mul; | |
3812 } | |
3813 } | |
3814 | |
3815 static void vector_fmul_sv_scalar_4_c(float *dst, const float *src, | |
3816 const float **sv, float mul, int len) | |
3817 { | |
3818 int i; | |
3819 for (i = 0; i < len; i += 4, sv++) { | |
3820 dst[i ] = src[i ] * sv[0][0] * mul; | |
3821 dst[i+1] = src[i+1] * sv[0][1] * mul; | |
3822 dst[i+2] = src[i+2] * sv[0][2] * mul; | |
3823 dst[i+3] = src[i+3] * sv[0][3] * mul; | |
3824 } | |
3825 } | |
3826 | |
3827 static void sv_fmul_scalar_2_c(float *dst, const float **sv, float mul, | |
3828 int len) | |
3829 { | |
3830 int i; | |
3831 for (i = 0; i < len; i += 2, sv++) { | |
3832 dst[i ] = sv[0][0] * mul; | |
3833 dst[i+1] = sv[0][1] * mul; | |
3834 } | |
3835 } | |
3836 | |
3837 static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul, | |
3838 int len) | |
3839 { | |
3840 int i; | |
3841 for (i = 0; i < len; i += 4, sv++) { | |
3842 dst[i ] = sv[0][0] * mul; | |
3843 dst[i+1] = sv[0][1] * mul; | |
3844 dst[i+2] = sv[0][2] * mul; | |
3845 dst[i+3] = sv[0][3] * mul; | |
3846 } | |
3847 } | |
3848 | |
3849 static void butterflies_float_c(float *restrict v1, float *restrict v2, | |
3850 int len) | |
3851 { | |
3852 int i; | |
3853 for (i = 0; i < len; i++) { | |
3854 float t = v1[i] - v2[i]; | |
3855 v1[i] += v2[i]; | |
3856 v2[i] = t; | |
3857 } | |
3858 } | |
3859 | |
3860 static float scalarproduct_float_c(const float *v1, const float *v2, int len) | |
3861 { | |
3862 float p = 0.0; | |
3863 int i; | |
3864 | |
3865 for (i = 0; i < len; i++) | |
3866 p += v1[i] * v2[i]; | |
3867 | |
3868 return p; | |
3869 } | |
3870 | |
7564 | 3871 static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){ |
3872 int i; | |
3873 for(i=0; i<len; i++) | |
3874 dst[i] = src[i] * mul; | |
3875 } | |
3876 | |
10104
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3877 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini, |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3878 uint32_t maxi, uint32_t maxisign) |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3879 { |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3880 |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3881 if(a > mini) return mini; |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3882 else if((a^(1<<31)) > maxisign) return maxi; |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3883 else return a; |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3884 } |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3885 |
10105 | 3886 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){ |
10104
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3887 int i; |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3888 uint32_t mini = *(uint32_t*)min; |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3889 uint32_t maxi = *(uint32_t*)max; |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3890 uint32_t maxisign = maxi ^ (1<<31); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3891 uint32_t *dsti = (uint32_t*)dst; |
10105 | 3892 const uint32_t *srci = (const uint32_t*)src; |
10104
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3893 for(i=0; i<len; i+=8) { |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3894 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3895 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3896 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3897 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3898 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3899 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3900 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3901 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3902 } |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3903 } |
10105 | 3904 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){ |
10104
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3905 int i; |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3906 if(min < 0 && max > 0) { |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3907 vector_clipf_c_opposite_sign(dst, src, &min, &max, len); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3908 } else { |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3909 for(i=0; i < len; i+=8) { |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3910 dst[i ] = av_clipf(src[i ], min, max); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3911 dst[i + 1] = av_clipf(src[i + 1], min, max); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3912 dst[i + 2] = av_clipf(src[i + 2], min, max); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3913 dst[i + 3] = av_clipf(src[i + 3], min, max); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3914 dst[i + 4] = av_clipf(src[i + 4], min, max); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3915 dst[i + 5] = av_clipf(src[i + 5], min, max); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3916 dst[i + 6] = av_clipf(src[i + 6], min, max); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3917 dst[i + 7] = av_clipf(src[i + 7], min, max); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3918 } |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3919 } |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3920 } |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3921 |
7261 | 3922 static av_always_inline int float_to_int16_one(const float *src){ |
3923 int_fast32_t tmp = *(const int32_t*)src; | |
3924 if(tmp & 0xf0000){ | |
3925 tmp = (0x43c0ffff - tmp)>>31; | |
3926 // is this faster on some gcc/cpu combinations? | |
3927 // if(tmp > 0x43c0ffff) tmp = 0xFFFF; | |
3928 // else tmp = 0; | |
3929 } | |
3930 return tmp - 0x8000; | |
3931 } | |
3932 | |
7218 | 3933 void ff_float_to_int16_c(int16_t *dst, const float *src, long len){ |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3934 int i; |
7261 | 3935 for(i=0; i<len; i++) |
3936 dst[i] = float_to_int16_one(src+i); | |
3937 } | |
3938 | |
7286
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
3939 void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){ |
7261 | 3940 int i,j,c; |
3941 if(channels==2){ | |
3942 for(i=0; i<len; i++){ | |
7286
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
3943 dst[2*i] = float_to_int16_one(src[0]+i); |
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
3944 dst[2*i+1] = float_to_int16_one(src[1]+i); |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3945 } |
7261 | 3946 }else{ |
7286
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
3947 for(c=0; c<channels; c++) |
7261 | 3948 for(i=0, j=c; i<len; i++, j+=channels) |
7286
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
3949 dst[j] = float_to_int16_one(src[c]+i); |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3950 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3951 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3952 |
11981 | 3953 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift) |
7203
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3954 { |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3955 int res = 0; |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3956 |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3957 while (order--) |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3958 res += (*v1++ * *v2++) >> shift; |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3959 |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3960 return res; |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3961 } |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3962 |
11981 | 3963 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul) |
10644 | 3964 { |
3965 int res = 0; | |
3966 while (order--) { | |
3967 res += *v1 * *v2++; | |
3968 *v1++ += mul * *v3++; | |
3969 } | |
3970 return res; | |
3971 } | |
3972 | |
5887 | 3973 #define W0 2048 |
3974 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ | |
3975 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ | |
3976 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ | |
3977 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */ | |
3978 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ | |
3979 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ | |
3980 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ | |
3981 | |
3982 static void wmv2_idct_row(short * b) | |
3983 { | |
3984 int s1,s2; | |
3985 int a0,a1,a2,a3,a4,a5,a6,a7; | |
3986 /*step 1*/ | |
3987 a1 = W1*b[1]+W7*b[7]; | |
3988 a7 = W7*b[1]-W1*b[7]; | |
3989 a5 = W5*b[5]+W3*b[3]; | |
3990 a3 = W3*b[5]-W5*b[3]; | |
3991 a2 = W2*b[2]+W6*b[6]; | |
3992 a6 = W6*b[2]-W2*b[6]; | |
3993 a0 = W0*b[0]+W0*b[4]; | |
3994 a4 = W0*b[0]-W0*b[4]; | |
3995 /*step 2*/ | |
3996 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7, | |
3997 s2 = (181*(a1-a5-a7+a3)+128)>>8; | |
3998 /*step 3*/ | |
3999 b[0] = (a0+a2+a1+a5 + (1<<7))>>8; | |
4000 b[1] = (a4+a6 +s1 + (1<<7))>>8; | |
4001 b[2] = (a4-a6 +s2 + (1<<7))>>8; | |
4002 b[3] = (a0-a2+a7+a3 + (1<<7))>>8; | |
4003 b[4] = (a0-a2-a7-a3 + (1<<7))>>8; | |
4004 b[5] = (a4-a6 -s2 + (1<<7))>>8; | |
4005 b[6] = (a4+a6 -s1 + (1<<7))>>8; | |
4006 b[7] = (a0+a2-a1-a5 + (1<<7))>>8; | |
4007 } | |
4008 static void wmv2_idct_col(short * b) | |
4009 { | |
4010 int s1,s2; | |
4011 int a0,a1,a2,a3,a4,a5,a6,a7; | |
4012 /*step 1, with extended precision*/ | |
4013 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3; | |
4014 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3; | |
4015 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3; | |
4016 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3; | |
4017 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3; | |
4018 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3; | |
4019 a0 = (W0*b[8*0]+W0*b[8*4] )>>3; | |
4020 a4 = (W0*b[8*0]-W0*b[8*4] )>>3; | |
4021 /*step 2*/ | |
4022 s1 = (181*(a1-a5+a7-a3)+128)>>8; | |
4023 s2 = (181*(a1-a5-a7+a3)+128)>>8; | |
4024 /*step 3*/ | |
4025 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14; | |
4026 b[8*1] = (a4+a6 +s1 + (1<<13))>>14; | |
4027 b[8*2] = (a4-a6 +s2 + (1<<13))>>14; | |
4028 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14; | |
4029 | |
4030 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14; | |
4031 b[8*5] = (a4-a6 -s2 + (1<<13))>>14; | |
4032 b[8*6] = (a4+a6 -s1 + (1<<13))>>14; | |
4033 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14; | |
4034 } | |
4035 void ff_wmv2_idct_c(short * block){ | |
4036 int i; | |
4037 | |
4038 for(i=0;i<64;i+=8){ | |
4039 wmv2_idct_row(block+i); | |
4040 } | |
4041 for(i=0;i<8;i++){ | |
4042 wmv2_idct_col(block+i); | |
4043 } | |
4044 } | |
1092 | 4045 /* XXX: those functions should be suppressed ASAP when all IDCTs are |
4046 converted */ | |
5887 | 4047 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block) |
4048 { | |
4049 ff_wmv2_idct_c(block); | |
4050 put_pixels_clamped_c(block, dest, line_size); | |
4051 } | |
4052 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block) | |
4053 { | |
4054 ff_wmv2_idct_c(block); | |
4055 add_pixels_clamped_c(block, dest, line_size); | |
4056 } | |
1092 | 4057 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) |
4058 { | |
4059 j_rev_dct (block); | |
4060 put_pixels_clamped_c(block, dest, line_size); | |
4061 } | |
4062 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) | |
4063 { | |
4064 j_rev_dct (block); | |
4065 add_pixels_clamped_c(block, dest, line_size); | |
4066 } | |
4067 | |
2256 | 4068 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block) |
4069 { | |
4070 j_rev_dct4 (block); | |
4071 put_pixels_clamped4_c(block, dest, line_size); | |
4072 } | |
4073 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block) | |
4074 { | |
4075 j_rev_dct4 (block); | |
4076 add_pixels_clamped4_c(block, dest, line_size); | |
4077 } | |
4078 | |
2257 | 4079 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block) |
4080 { | |
4081 j_rev_dct2 (block); | |
4082 put_pixels_clamped2_c(block, dest, line_size); | |
4083 } | |
4084 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) | |
4085 { | |
4086 j_rev_dct2 (block); | |
4087 add_pixels_clamped2_c(block, dest, line_size); | |
4088 } | |
4089 | |
2259 | 4090 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) |
4091 { | |
4176 | 4092 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2259 | 4093 |
4094 dest[0] = cm[(block[0] + 4)>>3]; | |
4095 } | |
4096 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) | |
4097 { | |
4176 | 4098 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2259 | 4099 |
4100 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; | |
4101 } | |
4102 | |
5143 | 4103 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; } |
3215
06f98047ff26
prefetch pixels for future motion compensation. 2-5% faster h264.
lorenm
parents:
3199
diff
changeset
|
4104 |
1201 | 4105 /* init static data */ |
10867 | 4106 av_cold void dsputil_static_init(void) |
0 | 4107 { |
751 | 4108 int i; |
0 | 4109 |
4176 | 4110 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i; |
1201 | 4111 for(i=0;i<MAX_NEG_CROP;i++) { |
4176 | 4112 ff_cropTbl[i] = 0; |
4113 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255; | |
1201 | 4114 } |
2967 | 4115 |
1201 | 4116 for(i=0;i<512;i++) { |
4179 | 4117 ff_squareTbl[i] = (i - 256) * (i - 256); |
1201 | 4118 } |
2967 | 4119 |
4197 | 4120 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; |
1201 | 4121 } |
0 | 4122 |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4123 int ff_check_alignment(void){ |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4124 static int did_fail=0; |
11369 | 4125 DECLARE_ALIGNED(16, int, aligned); |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4126 |
9259 | 4127 if((intptr_t)&aligned & 15){ |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4128 if(!did_fail){ |
8590 | 4129 #if HAVE_MMX || HAVE_ALTIVEC |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4130 av_log(NULL, AV_LOG_ERROR, |
4292 | 4131 "Compiler did not align stack variables. Libavcodec has been miscompiled\n" |
4132 "and may be very slow or crash. This is not a bug in libavcodec,\n" | |
5542
b0a566346fb1
Add attribute that forces alignment of stack to functions that need it.
ramiro
parents:
5520
diff
changeset
|
4133 "but in the compiler. You may try recompiling using gcc >= 4.2.\n" |
b0a566346fb1
Add attribute that forces alignment of stack to functions that need it.
ramiro
parents:
5520
diff
changeset
|
4134 "Do not report crashes to FFmpeg developers.\n"); |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4135 #endif |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4136 did_fail=1; |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4137 } |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4138 return -1; |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4139 } |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4140 return 0; |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4141 } |
861 | 4142 |
10867 | 4143 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) |
1201 | 4144 { |
4145 int i; | |
0 | 4146 |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4147 ff_check_alignment(); |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4148 |
8590 | 4149 #if CONFIG_ENCODERS |
1567 | 4150 if(avctx->dct_algo==FF_DCT_FASTINT) { |
1092 | 4151 c->fdct = fdct_ifast; |
2979 | 4152 c->fdct248 = fdct_ifast248; |
2967 | 4153 } |
1567 | 4154 else if(avctx->dct_algo==FF_DCT_FAAN) { |
1557 | 4155 c->fdct = ff_faandct; |
2979 | 4156 c->fdct248 = ff_faandct248; |
2967 | 4157 } |
1567 | 4158 else { |
1092 | 4159 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default |
2979 | 4160 c->fdct248 = ff_fdct248_islow; |
1567 | 4161 } |
1092 | 4162 #endif //CONFIG_ENCODERS |
4163 | |
2256 | 4164 if(avctx->lowres==1){ |
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4165 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){ |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4166 c->idct_put= ff_jref_idct4_put; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4167 c->idct_add= ff_jref_idct4_add; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4168 }else{ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4169 c->idct_put= ff_h264_lowres_idct_put_c; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4170 c->idct_add= ff_h264_lowres_idct_add_c; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4171 } |
2256 | 4172 c->idct = j_rev_dct4; |
1092 | 4173 c->idct_permutation_type= FF_NO_IDCT_PERM; |
2257 | 4174 }else if(avctx->lowres==2){ |
4175 c->idct_put= ff_jref_idct2_put; | |
4176 c->idct_add= ff_jref_idct2_add; | |
4177 c->idct = j_rev_dct2; | |
4178 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
2259 | 4179 }else if(avctx->lowres==3){ |
4180 c->idct_put= ff_jref_idct1_put; | |
4181 c->idct_add= ff_jref_idct1_add; | |
4182 c->idct = j_rev_dct1; | |
4183 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
2256 | 4184 }else{ |
4185 if(avctx->idct_algo==FF_IDCT_INT){ | |
4186 c->idct_put= ff_jref_idct_put; | |
4187 c->idct_add= ff_jref_idct_add; | |
4188 c->idct = j_rev_dct; | |
4189 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; | |
9975
d6d7e8d4a04d
Do not redundantly check for both CONFIG_THEORA_DECODER and CONFIG_VP3_DECODER.
diego
parents:
9586
diff
changeset
|
4190 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) && |
5007 | 4191 avctx->idct_algo==FF_IDCT_VP3){ |
2693 | 4192 c->idct_put= ff_vp3_idct_put_c; |
4193 c->idct_add= ff_vp3_idct_add_c; | |
4194 c->idct = ff_vp3_idct_c; | |
4195 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
5887 | 4196 }else if(avctx->idct_algo==FF_IDCT_WMV2){ |
4197 c->idct_put= ff_wmv2_idct_put_c; | |
4198 c->idct_add= ff_wmv2_idct_add_c; | |
4199 c->idct = ff_wmv2_idct_c; | |
4200 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
6407 | 4201 }else if(avctx->idct_algo==FF_IDCT_FAAN){ |
4202 c->idct_put= ff_faanidct_put; | |
4203 c->idct_add= ff_faanidct_add; | |
4204 c->idct = ff_faanidct; | |
4205 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4206 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) { |
8120 | 4207 c->idct_put= ff_ea_idct_put_c; |
4208 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
11231 | 4209 }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) { |
4210 c->idct = ff_bink_idct_c; | |
4211 c->idct_add = ff_bink_idct_add_c; | |
4212 c->idct_put = ff_bink_idct_put_c; | |
4213 c->idct_permutation_type = FF_NO_IDCT_PERM; | |
2256 | 4214 }else{ //accurate/default |
6001 | 4215 c->idct_put= ff_simple_idct_put; |
4216 c->idct_add= ff_simple_idct_add; | |
4217 c->idct = ff_simple_idct; | |
2256 | 4218 c->idct_permutation_type= FF_NO_IDCT_PERM; |
4219 } | |
1092 | 4220 } |
4221 | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4222 c->get_pixels = get_pixels_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4223 c->diff_pixels = diff_pixels_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4224 c->put_pixels_clamped = put_pixels_clamped_c; |
1984
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
4225 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; |
11231 | 4226 c->put_pixels_nonclamped = put_pixels_nonclamped_c; |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4227 c->add_pixels_clamped = add_pixels_clamped_c; |
2763 | 4228 c->add_pixels8 = add_pixels8_c; |
4229 c->add_pixels4 = add_pixels4_c; | |
4988
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
4230 c->sum_abs_dctelem = sum_abs_dctelem_c; |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4231 c->gmc1 = gmc1_c; |
3248
7aa9f80e7954
mmx implementation of 3-point GMC. (5x faster than C)
lorenm
parents:
3245
diff
changeset
|
4232 c->gmc = ff_gmc_c; |
8288 | 4233 c->clear_block = clear_block_c; |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4234 c->clear_blocks = clear_blocks_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4235 c->pix_sum = pix_sum_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4236 c->pix_norm1 = pix_norm1_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4237 |
11231 | 4238 c->fill_block_tab[0] = fill_block16_c; |
4239 c->fill_block_tab[1] = fill_block8_c; | |
4240 c->scale_block = scale_block_c; | |
4241 | |
859 | 4242 /* TODO [0] 16 [1] 8 */ |
1708 | 4243 c->pix_abs[0][0] = pix_abs16_c; |
4244 c->pix_abs[0][1] = pix_abs16_x2_c; | |
4245 c->pix_abs[0][2] = pix_abs16_y2_c; | |
4246 c->pix_abs[0][3] = pix_abs16_xy2_c; | |
4247 c->pix_abs[1][0] = pix_abs8_c; | |
4248 c->pix_abs[1][1] = pix_abs8_x2_c; | |
4249 c->pix_abs[1][2] = pix_abs8_y2_c; | |
4250 c->pix_abs[1][3] = pix_abs8_xy2_c; | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4251 |
859 | 4252 #define dspfunc(PFX, IDX, NUM) \ |
4253 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \ | |
4254 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \ | |
4255 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \ | |
4256 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4257 |
859 | 4258 dspfunc(put, 0, 16); |
4259 dspfunc(put_no_rnd, 0, 16); | |
4260 dspfunc(put, 1, 8); | |
4261 dspfunc(put_no_rnd, 1, 8); | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4262 dspfunc(put, 2, 4); |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4263 dspfunc(put, 3, 2); |
0 | 4264 |
859 | 4265 dspfunc(avg, 0, 16); |
4266 dspfunc(avg_no_rnd, 0, 16); | |
4267 dspfunc(avg, 1, 8); | |
4268 dspfunc(avg_no_rnd, 1, 8); | |
1319 | 4269 dspfunc(avg, 2, 4); |
4270 dspfunc(avg, 3, 2); | |
859 | 4271 #undef dspfunc |
857 | 4272 |
1864 | 4273 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c; |
4274 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c; | |
4275 | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4276 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4277 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4278 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4279 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4280 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4281 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4282 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4283 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4284 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4285 |
1319 | 4286 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; |
4287 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; | |
4288 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; | |
4289 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; | |
4290 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; | |
4291 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; | |
4292 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; | |
4293 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; | |
4294 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; | |
4295 | |
859 | 4296 #define dspfunc(PFX, IDX, NUM) \ |
4297 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \ | |
4298 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \ | |
4299 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \ | |
4300 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \ | |
4301 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \ | |
4302 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \ | |
4303 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \ | |
4304 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \ | |
4305 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \ | |
4306 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \ | |
4307 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \ | |
4308 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \ | |
4309 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \ | |
4310 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \ | |
4311 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \ | |
4312 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c | |
857 | 4313 |
859 | 4314 dspfunc(put_qpel, 0, 16); |
4315 dspfunc(put_no_rnd_qpel, 0, 16); | |
4316 | |
4317 dspfunc(avg_qpel, 0, 16); | |
4318 /* dspfunc(avg_no_rnd_qpel, 0, 16); */ | |
857 | 4319 |
859 | 4320 dspfunc(put_qpel, 1, 8); |
4321 dspfunc(put_no_rnd_qpel, 1, 8); | |
4322 | |
4323 dspfunc(avg_qpel, 1, 8); | |
4324 /* dspfunc(avg_no_rnd_qpel, 1, 8); */ | |
1168 | 4325 |
4326 dspfunc(put_h264_qpel, 0, 16); | |
4327 dspfunc(put_h264_qpel, 1, 8); | |
4328 dspfunc(put_h264_qpel, 2, 4); | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
4329 dspfunc(put_h264_qpel, 3, 2); |
1168 | 4330 dspfunc(avg_h264_qpel, 0, 16); |
4331 dspfunc(avg_h264_qpel, 1, 8); | |
4332 dspfunc(avg_h264_qpel, 2, 4); | |
4333 | |
859 | 4334 #undef dspfunc |
1168 | 4335 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c; |
4336 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c; | |
4337 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c; | |
4338 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c; | |
4339 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c; | |
4340 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c; | |
9439
ef3a7b711cc0
Rename put_no_rnd_h264_chroma* to reflect its usage in VC1 only
conrad
parents:
9437
diff
changeset
|
4341 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c; |
9440 | 4342 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c; |
857 | 4343 |
6437 | 4344 c->draw_edges = draw_edges_c; |
4345 | |
9585 | 4346 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER |
4347 ff_mlp_init(c, avctx); | |
4348 #endif | |
9995
3141f69e3905
Do not check for both CONFIG_VC1_DECODER and CONFIG_WMV3_DECODER,
diego
parents:
9975
diff
changeset
|
4349 #if CONFIG_VC1_DECODER |
3526 | 4350 ff_vc1dsp_init(c,avctx); |
4351 #endif | |
9995
3141f69e3905
Do not check for both CONFIG_VC1_DECODER and CONFIG_WMV3_DECODER,
diego
parents:
9975
diff
changeset
|
4352 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER |
5887 | 4353 ff_intrax8dsp_init(c,avctx); |
4354 #endif | |
8590 | 4355 #if CONFIG_RV30_DECODER |
8410 | 4356 ff_rv30dsp_init(c,avctx); |
4357 #endif | |
8590 | 4358 #if CONFIG_RV40_DECODER |
8232 | 4359 ff_rv40dsp_init(c,avctx); |
4360 c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c; | |
4361 c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c; | |
4362 c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c; | |
4363 c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c; | |
4364 #endif | |
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
4365 |
936 | 4366 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; |
4367 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; | |
4368 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; | |
4369 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; | |
4370 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; | |
4371 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; | |
4372 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; | |
4373 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; | |
2967 | 4374 |
1708 | 4375 #define SET_CMP_FUNC(name) \ |
4376 c->name[0]= name ## 16_c;\ | |
4377 c->name[1]= name ## 8x8_c; | |
2967 | 4378 |
1708 | 4379 SET_CMP_FUNC(hadamard8_diff) |
1729 | 4380 c->hadamard8_diff[4]= hadamard8_intra16_c; |
8978 | 4381 c->hadamard8_diff[5]= hadamard8_intra8x8_c; |
1708 | 4382 SET_CMP_FUNC(dct_sad) |
2382 | 4383 SET_CMP_FUNC(dct_max) |
8590 | 4384 #if CONFIG_GPL |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
4385 SET_CMP_FUNC(dct264_sad) |
3013 | 4386 #endif |
1708 | 4387 c->sad[0]= pix_abs16_c; |
4388 c->sad[1]= pix_abs8_c; | |
4389 c->sse[0]= sse16_c; | |
4390 c->sse[1]= sse8_c; | |
2184 | 4391 c->sse[2]= sse4_c; |
1708 | 4392 SET_CMP_FUNC(quant_psnr) |
4393 SET_CMP_FUNC(rd) | |
4394 SET_CMP_FUNC(bit) | |
1729 | 4395 c->vsad[0]= vsad16_c; |
4396 c->vsad[4]= vsad_intra16_c; | |
8978 | 4397 c->vsad[5]= vsad_intra8_c; |
1729 | 4398 c->vsse[0]= vsse16_c; |
4399 c->vsse[4]= vsse_intra16_c; | |
8978 | 4400 c->vsse[5]= vsse_intra8_c; |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
4401 c->nsse[0]= nsse16_c; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
4402 c->nsse[1]= nsse8_c; |
11485 | 4403 #if CONFIG_DWT |
4404 ff_dsputil_init_dwt(c); | |
3373
b8996cc5ccae
Disable w53 and w97 cmp methods when snow encoder is disabled
gpoirier
parents:
3323
diff
changeset
|
4405 #endif |
2184 | 4406 |
4749 | 4407 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; |
4408 | |
866 | 4409 c->add_bytes= add_bytes_c; |
6384 | 4410 c->add_bytes_l2= add_bytes_l2_c; |
866 | 4411 c->diff_bytes= diff_bytes_c; |
8760 | 4412 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c; |
1527 | 4413 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; |
10370 | 4414 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c; |
4415 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c; | |
1273 | 4416 c->bswap_buf= bswap_buf; |
8590 | 4417 #if CONFIG_PNG_DECODER |
6384 | 4418 c->add_png_paeth_prediction= ff_add_png_paeth_prediction; |
4419 #endif | |
2633 | 4420 |
10749
5cca4b6c459d
Get rid of pointless CONFIG_ANY_H263 preprocessor definition.
diego
parents:
10748
diff
changeset
|
4421 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { |
5278 | 4422 c->h263_h_loop_filter= h263_h_loop_filter_c; |
4423 c->h263_v_loop_filter= h263_v_loop_filter_c; | |
5277
7b3fcb7c61ce
Avoid linking with h263.c functions when the relevant codecs
aurel
parents:
5256
diff
changeset
|
4424 } |
2967 | 4425 |
9975
d6d7e8d4a04d
Do not redundantly check for both CONFIG_THEORA_DECODER and CONFIG_VP3_DECODER.
diego
parents:
9586
diff
changeset
|
4426 if (CONFIG_VP3_DECODER) { |
7995 | 4427 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c; |
4428 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c; | |
11637 | 4429 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c; |
7995 | 4430 } |
8785
bee83b3f9a6b
move vp6_filter_diag4() to a new vp6dsp.c file and use it throught dsputil
aurel
parents:
8760
diff
changeset
|
4431 if (CONFIG_VP6_DECODER) { |
bee83b3f9a6b
move vp6_filter_diag4() to a new vp6dsp.c file and use it throught dsputil
aurel
parents:
8760
diff
changeset
|
4432 c->vp6_filter_diag4= ff_vp6_filter_diag4_c; |
bee83b3f9a6b
move vp6_filter_diag4() to a new vp6dsp.c file and use it throught dsputil
aurel
parents:
8760
diff
changeset
|
4433 } |
7995 | 4434 |
2045 | 4435 c->h261_loop_filter= h261_loop_filter_c; |
2967 | 4436 |
1784 | 4437 c->try_8x8basis= try_8x8basis_c; |
4438 c->add_8x8basis= add_8x8basis_c; | |
866 | 4439 |
8590 | 4440 #if CONFIG_VORBIS_DECODER |
3536
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
4441 c->vorbis_inverse_coupling = vorbis_inverse_coupling; |
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
4442 #endif |
8590 | 4443 #if CONFIG_AC3_DECODER |
7563 | 4444 c->ac3_downmix = ff_ac3_downmix_c; |
4445 #endif | |
10429
289dd8daf4ee
add CONFIG_LPC to the build system for lpc dsputil functions. fixes build
jbr
parents:
10424
diff
changeset
|
4446 #if CONFIG_LPC |
10424
94595d0e617c
Move autocorrelation function from flacenc.c to lpc.c. Also rename the
jbr
parents:
10421
diff
changeset
|
4447 c->lpc_compute_autocorr = ff_lpc_compute_autocorr; |
10429
289dd8daf4ee
add CONFIG_LPC to the build system for lpc dsputil functions. fixes build
jbr
parents:
10424
diff
changeset
|
4448 #endif |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4449 c->vector_fmul = vector_fmul_c; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4450 c->vector_fmul_reverse = vector_fmul_reverse_c; |
10300
4d1b9ca628fc
Drop unused args from vector_fmul_add_add, simpify code, and rename
mru
parents:
10219
diff
changeset
|
4451 c->vector_fmul_add = vector_fmul_add_c; |
7261 | 4452 c->vector_fmul_window = ff_vector_fmul_window_c; |
7564 | 4453 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c; |
10104
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
4454 c->vector_clipf = vector_clipf_c; |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4455 c->float_to_int16 = ff_float_to_int16_c; |
7261 | 4456 c->float_to_int16_interleave = ff_float_to_int16_interleave_c; |
7203
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4457 c->scalarproduct_int16 = scalarproduct_int16_c; |
10644 | 4458 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c; |
10219 | 4459 c->scalarproduct_float = scalarproduct_float_c; |
4460 c->butterflies_float = butterflies_float_c; | |
4461 c->vector_fmul_scalar = vector_fmul_scalar_c; | |
4462 | |
4463 c->vector_fmul_sv_scalar[0] = vector_fmul_sv_scalar_2_c; | |
4464 c->vector_fmul_sv_scalar[1] = vector_fmul_sv_scalar_4_c; | |
4465 | |
4466 c->sv_fmul_scalar[0] = sv_fmul_scalar_2_c; | |
4467 c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c; | |
3536
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
4468 |
3245 | 4469 c->shrink[0]= ff_img_copy_plane; |
4470 c->shrink[1]= ff_shrink22; | |
4471 c->shrink[2]= ff_shrink44; | |
4472 c->shrink[3]= ff_shrink88; | |
4473 | |
3215
06f98047ff26
prefetch pixels for future motion compensation. 2-5% faster h264.
lorenm
parents:
3199
diff
changeset
|
4474 c->prefetch= just_return; |
06f98047ff26
prefetch pixels for future motion compensation. 2-5% faster h264.
lorenm
parents:
3199
diff
changeset
|
4475 |
3807
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4476 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab)); |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4477 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab)); |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4478 |
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4479 if (HAVE_MMX) dsputil_init_mmx (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4480 if (ARCH_ARM) dsputil_init_arm (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4481 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4482 if (HAVE_VIS) dsputil_init_vis (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4483 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4484 if (ARCH_PPC) dsputil_init_ppc (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4485 if (HAVE_MMI) dsputil_init_mmi (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4486 if (ARCH_SH4) dsputil_init_sh4 (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4487 if (ARCH_BFIN) dsputil_init_bfin (c, avctx); |
1092 | 4488 |
3807
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4489 for(i=0; i<64; i++){ |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4490 if(!c->put_2tap_qpel_pixels_tab[0][i]) |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4491 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i]; |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4492 if(!c->avg_2tap_qpel_pixels_tab[0][i]) |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4493 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i]; |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4494 } |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4495 |
11988
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4496 c->put_rv30_tpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0]; |
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4497 c->put_rv30_tpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0]; |
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4498 c->avg_rv30_tpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0]; |
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4499 c->avg_rv30_tpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0]; |
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4500 |
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4501 c->put_rv40_qpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0]; |
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4502 c->put_rv40_qpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0]; |
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4503 c->avg_rv40_qpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0]; |
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4504 c->avg_rv40_qpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0]; |
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4505 |
1092 | 4506 switch(c->idct_permutation_type){ |
4507 case FF_NO_IDCT_PERM: | |
4508 for(i=0; i<64; i++) | |
4509 c->idct_permutation[i]= i; | |
4510 break; | |
4511 case FF_LIBMPEG2_IDCT_PERM: | |
4512 for(i=0; i<64; i++) | |
4513 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |
4514 break; | |
4515 case FF_SIMPLE_IDCT_PERM: | |
4516 for(i=0; i<64; i++) | |
4517 c->idct_permutation[i]= simple_mmx_permutation[i]; | |
4518 break; | |
4519 case FF_TRANSPOSE_IDCT_PERM: | |
4520 for(i=0; i<64; i++) | |
4521 c->idct_permutation[i]= ((i&7)<<3) | (i>>3); | |
4522 break; | |
2696
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4523 case FF_PARTTRANS_IDCT_PERM: |
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4524 for(i=0; i<64; i++) |
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4525 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); |
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4526 break; |
6600
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
4527 case FF_SSE2_IDCT_PERM: |
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
4528 for(i=0; i<64; i++) |
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
4529 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; |
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
4530 break; |
1092 | 4531 default: |
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1571
diff
changeset
|
4532 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); |
1092 | 4533 } |
0 | 4534 } |
252
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
4535 |