Mercurial > libavcodec.hg
annotate dsputil.c @ 12494:94eaea836bf4 libavcodec
Check avctx width/height more thoroughly (e.g. all values 0 except width would
have been accepted before).
Also do not fail if they are invalid but instead override them to 0.
This allows decoding e.g. MPEG video when only the container values are corrupted.
For encoding a value of 0,0 of course makes no sense, but was allowed
through before and will be caught by an extra check in the encode function.
author | reimar |
---|---|
date | Wed, 15 Sep 2010 04:46:55 +0000 |
parents | 0a306a267dbf |
children |
rev | line source |
---|---|
0 | 1 /* |
2 * DSP utils | |
8629
04423b2f6e0b
cosmetics: Remove pointless period after copyright statement non-sentences.
diego
parents:
8627
diff
changeset
|
3 * Copyright (c) 2000, 2001 Fabrice Bellard |
1739
07a484280a82
copyright year update of the files i touched and remembered, things look annoyingly unmaintained otherwise
michael
parents:
1729
diff
changeset
|
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
0 | 5 * |
5214 | 6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> |
7 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
8 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
9 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
10 * FFmpeg is free software; you can redistribute it and/or |
429 | 11 * modify it under the terms of the GNU Lesser General Public |
12 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
13 * version 2.1 of the License, or (at your option) any later version. |
0 | 14 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
15 * FFmpeg is distributed in the hope that it will be useful, |
0 | 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
18 * Lesser General Public License for more details. | |
0 | 19 * |
429 | 20 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
21 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3029
diff
changeset
|
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
0 | 23 */ |
2967 | 24 |
1106 | 25 /** |
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
11637
diff
changeset
|
26 * @file |
1106 | 27 * DSP utils |
28 */ | |
2967 | 29 |
12466
0a306a267dbf
Reimplement ff_img_copy_plane() as av_image_copy_plane() in libavcore,
stefano
parents:
12423
diff
changeset
|
30 #include "libavcore/imgutils.h" |
0 | 31 #include "avcodec.h" |
32 #include "dsputil.h" | |
1092 | 33 #include "simple_idct.h" |
1557 | 34 #include "faandct.h" |
6407 | 35 #include "faanidct.h" |
8627
d6bab465b82c
moves mid_pred() into mathops.h (with arch specific code split by directory)
aurel
parents:
8596
diff
changeset
|
36 #include "mathops.h" |
10748
36611425fedb
Add required header #includes for mpegvideo.h and config.h.
diego
parents:
10644
diff
changeset
|
37 #include "mpegvideo.h" |
36611425fedb
Add required header #includes for mpegvideo.h and config.h.
diego
parents:
10644
diff
changeset
|
38 #include "config.h" |
11375
84963c795459
Move some prototypes from dsputil.c to reasonable header files
mru
parents:
11369
diff
changeset
|
39 #include "lpc.h" |
84963c795459
Move some prototypes from dsputil.c to reasonable header files
mru
parents:
11369
diff
changeset
|
40 #include "ac3dec.h" |
84963c795459
Move some prototypes from dsputil.c to reasonable header files
mru
parents:
11369
diff
changeset
|
41 #include "vorbis.h" |
84963c795459
Move some prototypes from dsputil.c to reasonable header files
mru
parents:
11369
diff
changeset
|
42 #include "png.h" |
11921 | 43 #include "vp8dsp.h" |
676 | 44 |
4176 | 45 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; |
4179 | 46 uint32_t ff_squareTbl[512] = {0, }; |
0 | 47 |
6387 | 48 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size |
49 #define pb_7f (~0UL/255 * 0x7f) | |
50 #define pb_80 (~0UL/255 * 0x80) | |
6385 | 51 |
1064 | 52 const uint8_t ff_zigzag_direct[64] = { |
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
53 0, 1, 8, 16, 9, 2, 3, 10, |
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
54 17, 24, 32, 25, 18, 11, 4, 5, |
34 | 55 12, 19, 26, 33, 40, 48, 41, 34, |
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
56 27, 20, 13, 6, 7, 14, 21, 28, |
34 | 57 35, 42, 49, 56, 57, 50, 43, 36, |
58 29, 22, 15, 23, 30, 37, 44, 51, | |
59 58, 59, 52, 45, 38, 31, 39, 46, | |
60 53, 60, 61, 54, 47, 55, 62, 63 | |
61 }; | |
62 | |
1567 | 63 /* Specific zigzag scan for 248 idct. NOTE that unlike the |
64 specification, we interleave the fields */ | |
65 const uint8_t ff_zigzag248_direct[64] = { | |
66 0, 8, 1, 9, 16, 24, 2, 10, | |
67 17, 25, 32, 40, 48, 56, 33, 41, | |
68 18, 26, 3, 11, 4, 12, 19, 27, | |
69 34, 42, 49, 57, 50, 58, 35, 43, | |
70 20, 28, 5, 13, 6, 14, 21, 29, | |
71 36, 44, 51, 59, 52, 60, 37, 45, | |
72 22, 30, 7, 15, 23, 31, 38, 46, | |
73 53, 61, 54, 62, 39, 47, 55, 63, | |
74 }; | |
75 | |
220 | 76 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ |
11369 | 77 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64]; |
220 | 78 |
1064 | 79 const uint8_t ff_alternate_horizontal_scan[64] = { |
2967 | 80 0, 1, 2, 3, 8, 9, 16, 17, |
34 | 81 10, 11, 4, 5, 6, 7, 15, 14, |
2967 | 82 13, 12, 19, 18, 24, 25, 32, 33, |
34 | 83 26, 27, 20, 21, 22, 23, 28, 29, |
2967 | 84 30, 31, 34, 35, 40, 41, 48, 49, |
34 | 85 42, 43, 36, 37, 38, 39, 44, 45, |
2967 | 86 46, 47, 50, 51, 56, 57, 58, 59, |
34 | 87 52, 53, 54, 55, 60, 61, 62, 63, |
88 }; | |
89 | |
1064 | 90 const uint8_t ff_alternate_vertical_scan[64] = { |
2967 | 91 0, 8, 16, 24, 1, 9, 2, 10, |
34 | 92 17, 25, 32, 40, 48, 56, 57, 49, |
2967 | 93 41, 33, 26, 18, 3, 11, 4, 12, |
34 | 94 19, 27, 34, 42, 50, 58, 35, 43, |
2967 | 95 51, 59, 20, 28, 5, 13, 6, 14, |
34 | 96 21, 29, 36, 44, 52, 60, 37, 45, |
2967 | 97 53, 61, 22, 30, 7, 15, 23, 31, |
34 | 98 38, 46, 54, 62, 39, 47, 55, 63, |
99 }; | |
100 | |
1092 | 101 /* Input permutation for the simple_idct_mmx */ |
102 static const uint8_t simple_mmx_permutation[64]={ | |
2979 | 103 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, |
104 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |
105 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |
106 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |
107 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |
108 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |
109 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |
110 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |
1092 | 111 }; |
112 | |
6600
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
113 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7}; |
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
114 |
6438 | 115 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){ |
116 int i; | |
117 int end; | |
118 | |
119 st->scantable= src_scantable; | |
120 | |
121 for(i=0; i<64; i++){ | |
122 int j; | |
123 j = src_scantable[i]; | |
124 st->permutated[i] = permutation[j]; | |
8590 | 125 #if ARCH_PPC |
6438 | 126 st->inverse[j] = i; |
127 #endif | |
128 } | |
129 | |
130 end=-1; | |
131 for(i=0; i<64; i++){ | |
132 int j; | |
133 j = st->permutated[i]; | |
134 if(j>end) end=j; | |
135 st->raster_end[i]= end; | |
136 } | |
137 } | |
138 | |
1064 | 139 static int pix_sum_c(uint8_t * pix, int line_size) |
612 | 140 { |
141 int s, i, j; | |
142 | |
143 s = 0; | |
144 for (i = 0; i < 16; i++) { | |
2979 | 145 for (j = 0; j < 16; j += 8) { |
146 s += pix[0]; | |
147 s += pix[1]; | |
148 s += pix[2]; | |
149 s += pix[3]; | |
150 s += pix[4]; | |
151 s += pix[5]; | |
152 s += pix[6]; | |
153 s += pix[7]; | |
154 pix += 8; | |
155 } | |
156 pix += line_size - 16; | |
612 | 157 } |
158 return s; | |
159 } | |
160 | |
1064 | 161 static int pix_norm1_c(uint8_t * pix, int line_size) |
612 | 162 { |
163 int s, i, j; | |
4179 | 164 uint32_t *sq = ff_squareTbl + 256; |
612 | 165 |
166 s = 0; | |
167 for (i = 0; i < 16; i++) { | |
2979 | 168 for (j = 0; j < 16; j += 8) { |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
169 #if 0 |
2979 | 170 s += sq[pix[0]]; |
171 s += sq[pix[1]]; | |
172 s += sq[pix[2]]; | |
173 s += sq[pix[3]]; | |
174 s += sq[pix[4]]; | |
175 s += sq[pix[5]]; | |
176 s += sq[pix[6]]; | |
177 s += sq[pix[7]]; | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
178 #else |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
179 #if LONG_MAX > 2147483647 |
2979 | 180 register uint64_t x=*(uint64_t*)pix; |
181 s += sq[x&0xff]; | |
182 s += sq[(x>>8)&0xff]; | |
183 s += sq[(x>>16)&0xff]; | |
184 s += sq[(x>>24)&0xff]; | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
185 s += sq[(x>>32)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
186 s += sq[(x>>40)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
187 s += sq[(x>>48)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
188 s += sq[(x>>56)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
189 #else |
2979 | 190 register uint32_t x=*(uint32_t*)pix; |
191 s += sq[x&0xff]; | |
192 s += sq[(x>>8)&0xff]; | |
193 s += sq[(x>>16)&0xff]; | |
194 s += sq[(x>>24)&0xff]; | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
195 x=*(uint32_t*)(pix+4); |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
196 s += sq[x&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
197 s += sq[(x>>8)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
198 s += sq[(x>>16)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
199 s += sq[(x>>24)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
200 #endif |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
201 #endif |
2979 | 202 pix += 8; |
203 } | |
204 pix += line_size - 16; | |
612 | 205 } |
206 return s; | |
207 } | |
208 | |
6241 | 209 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){ |
1273 | 210 int i; |
2967 | 211 |
1273 | 212 for(i=0; i+8<=w; i+=8){ |
12129 | 213 dst[i+0]= av_bswap32(src[i+0]); |
214 dst[i+1]= av_bswap32(src[i+1]); | |
215 dst[i+2]= av_bswap32(src[i+2]); | |
216 dst[i+3]= av_bswap32(src[i+3]); | |
217 dst[i+4]= av_bswap32(src[i+4]); | |
218 dst[i+5]= av_bswap32(src[i+5]); | |
219 dst[i+6]= av_bswap32(src[i+6]); | |
220 dst[i+7]= av_bswap32(src[i+7]); | |
1273 | 221 } |
222 for(;i<w; i++){ | |
12129 | 223 dst[i+0]= av_bswap32(src[i+0]); |
1273 | 224 } |
225 } | |
612 | 226 |
2184 | 227 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) |
228 { | |
229 int s, i; | |
4179 | 230 uint32_t *sq = ff_squareTbl + 256; |
2184 | 231 |
232 s = 0; | |
233 for (i = 0; i < h; i++) { | |
234 s += sq[pix1[0] - pix2[0]]; | |
235 s += sq[pix1[1] - pix2[1]]; | |
236 s += sq[pix1[2] - pix2[2]]; | |
237 s += sq[pix1[3] - pix2[3]]; | |
238 pix1 += line_size; | |
239 pix2 += line_size; | |
240 } | |
241 return s; | |
242 } | |
243 | |
1708 | 244 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) |
936 | 245 { |
246 int s, i; | |
4179 | 247 uint32_t *sq = ff_squareTbl + 256; |
936 | 248 |
249 s = 0; | |
1708 | 250 for (i = 0; i < h; i++) { |
936 | 251 s += sq[pix1[0] - pix2[0]]; |
252 s += sq[pix1[1] - pix2[1]]; | |
253 s += sq[pix1[2] - pix2[2]]; | |
254 s += sq[pix1[3] - pix2[3]]; | |
255 s += sq[pix1[4] - pix2[4]]; | |
256 s += sq[pix1[5] - pix2[5]]; | |
257 s += sq[pix1[6] - pix2[6]]; | |
258 s += sq[pix1[7] - pix2[7]]; | |
259 pix1 += line_size; | |
260 pix2 += line_size; | |
261 } | |
262 return s; | |
263 } | |
264 | |
1708 | 265 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
884 | 266 { |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
267 int s, i; |
4179 | 268 uint32_t *sq = ff_squareTbl + 256; |
884 | 269 |
270 s = 0; | |
1708 | 271 for (i = 0; i < h; i++) { |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
272 s += sq[pix1[ 0] - pix2[ 0]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
273 s += sq[pix1[ 1] - pix2[ 1]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
274 s += sq[pix1[ 2] - pix2[ 2]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
275 s += sq[pix1[ 3] - pix2[ 3]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
276 s += sq[pix1[ 4] - pix2[ 4]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
277 s += sq[pix1[ 5] - pix2[ 5]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
278 s += sq[pix1[ 6] - pix2[ 6]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
279 s += sq[pix1[ 7] - pix2[ 7]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
280 s += sq[pix1[ 8] - pix2[ 8]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
281 s += sq[pix1[ 9] - pix2[ 9]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
282 s += sq[pix1[10] - pix2[10]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
283 s += sq[pix1[11] - pix2[11]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
284 s += sq[pix1[12] - pix2[12]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
285 s += sq[pix1[13] - pix2[13]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
286 s += sq[pix1[14] - pix2[14]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
287 s += sq[pix1[15] - pix2[15]]; |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
288 |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
289 pix1 += line_size; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
290 pix2 += line_size; |
884 | 291 } |
292 return s; | |
293 } | |
294 | |
6437 | 295 /* draw the edges of width 'w' of an image of size width, height */ |
296 //FIXME check that this is ok for mpeg4 interlaced | |
297 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w) | |
298 { | |
299 uint8_t *ptr, *last_line; | |
300 int i; | |
301 | |
302 last_line = buf + (height - 1) * wrap; | |
303 for(i=0;i<w;i++) { | |
304 /* top and bottom */ | |
305 memcpy(buf - (i + 1) * wrap, buf, width); | |
306 memcpy(last_line + (i + 1) * wrap, last_line, width); | |
307 } | |
308 /* left and right */ | |
309 ptr = buf; | |
310 for(i=0;i<height;i++) { | |
311 memset(ptr - w, ptr[0], w); | |
312 memset(ptr + width, ptr[width-1], w); | |
313 ptr += wrap; | |
314 } | |
315 /* corners */ | |
316 for(i=0;i<w;i++) { | |
317 memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */ | |
318 memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */ | |
319 memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */ | |
320 memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */ | |
321 } | |
322 } | |
323 | |
6445 | 324 /** |
12024 | 325 * Copy a rectangular area of samples to a temporary buffer and replicate the border samples. |
6445 | 326 * @param buf destination buffer |
327 * @param src source buffer | |
328 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers | |
329 * @param block_w width of block | |
330 * @param block_h height of block | |
331 * @param src_x x coordinate of the top left sample of the block in the source buffer | |
332 * @param src_y y coordinate of the top left sample of the block in the source buffer | |
333 * @param w width of the source buffer | |
334 * @param h height of the source buffer | |
335 */ | |
11784 | 336 void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h, |
6445 | 337 int src_x, int src_y, int w, int h){ |
338 int x, y; | |
339 int start_y, start_x, end_y, end_x; | |
340 | |
341 if(src_y>= h){ | |
342 src+= (h-1-src_y)*linesize; | |
343 src_y=h-1; | |
344 }else if(src_y<=-block_h){ | |
345 src+= (1-block_h-src_y)*linesize; | |
346 src_y=1-block_h; | |
347 } | |
348 if(src_x>= w){ | |
349 src+= (w-1-src_x); | |
350 src_x=w-1; | |
351 }else if(src_x<=-block_w){ | |
352 src+= (1-block_w-src_x); | |
353 src_x=1-block_w; | |
354 } | |
355 | |
356 start_y= FFMAX(0, -src_y); | |
357 start_x= FFMAX(0, -src_x); | |
358 end_y= FFMIN(block_h, h-src_y); | |
359 end_x= FFMIN(block_w, w-src_x); | |
360 | |
361 // copy existing part | |
362 for(y=start_y; y<end_y; y++){ | |
363 for(x=start_x; x<end_x; x++){ | |
364 buf[x + y*linesize]= src[x + y*linesize]; | |
365 } | |
366 } | |
367 | |
368 //top | |
369 for(y=0; y<start_y; y++){ | |
370 for(x=start_x; x<end_x; x++){ | |
371 buf[x + y*linesize]= buf[x + start_y*linesize]; | |
372 } | |
373 } | |
374 | |
375 //bottom | |
376 for(y=end_y; y<block_h; y++){ | |
377 for(x=start_x; x<end_x; x++){ | |
378 buf[x + y*linesize]= buf[x + (end_y-1)*linesize]; | |
379 } | |
380 } | |
381 | |
382 for(y=0; y<block_h; y++){ | |
383 //left | |
384 for(x=0; x<start_x; x++){ | |
385 buf[x + y*linesize]= buf[start_x + y*linesize]; | |
386 } | |
387 | |
388 //right | |
389 for(x=end_x; x<block_w; x++){ | |
390 buf[x + y*linesize]= buf[end_x - 1 + y*linesize]; | |
391 } | |
392 } | |
393 } | |
394 | |
1064 | 395 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) |
0 | 396 { |
397 int i; | |
398 | |
399 /* read the pixels */ | |
400 for(i=0;i<8;i++) { | |
516 | 401 block[0] = pixels[0]; |
402 block[1] = pixels[1]; | |
403 block[2] = pixels[2]; | |
404 block[3] = pixels[3]; | |
405 block[4] = pixels[4]; | |
406 block[5] = pixels[5]; | |
407 block[6] = pixels[6]; | |
408 block[7] = pixels[7]; | |
409 pixels += line_size; | |
410 block += 8; | |
0 | 411 } |
412 } | |
413 | |
1064 | 414 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, |
2979 | 415 const uint8_t *s2, int stride){ |
324 | 416 int i; |
417 | |
418 /* read the pixels */ | |
419 for(i=0;i<8;i++) { | |
516 | 420 block[0] = s1[0] - s2[0]; |
421 block[1] = s1[1] - s2[1]; | |
422 block[2] = s1[2] - s2[2]; | |
423 block[3] = s1[3] - s2[3]; | |
424 block[4] = s1[4] - s2[4]; | |
425 block[5] = s1[5] - s2[5]; | |
426 block[6] = s1[6] - s2[6]; | |
427 block[7] = s1[7] - s2[7]; | |
324 | 428 s1 += stride; |
429 s2 += stride; | |
516 | 430 block += 8; |
324 | 431 } |
432 } | |
433 | |
434 | |
1064 | 435 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
2979 | 436 int line_size) |
0 | 437 { |
438 int i; | |
4176 | 439 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 440 |
0 | 441 /* read the pixels */ |
442 for(i=0;i<8;i++) { | |
516 | 443 pixels[0] = cm[block[0]]; |
444 pixels[1] = cm[block[1]]; | |
445 pixels[2] = cm[block[2]]; | |
446 pixels[3] = cm[block[3]]; | |
447 pixels[4] = cm[block[4]]; | |
448 pixels[5] = cm[block[5]]; | |
449 pixels[6] = cm[block[6]]; | |
450 pixels[7] = cm[block[7]]; | |
451 | |
452 pixels += line_size; | |
453 block += 8; | |
0 | 454 } |
455 } | |
456 | |
2256 | 457 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, |
2979 | 458 int line_size) |
2256 | 459 { |
460 int i; | |
4176 | 461 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 462 |
2256 | 463 /* read the pixels */ |
464 for(i=0;i<4;i++) { | |
465 pixels[0] = cm[block[0]]; | |
466 pixels[1] = cm[block[1]]; | |
467 pixels[2] = cm[block[2]]; | |
468 pixels[3] = cm[block[3]]; | |
469 | |
470 pixels += line_size; | |
471 block += 8; | |
472 } | |
473 } | |
474 | |
2257 | 475 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, |
2979 | 476 int line_size) |
2257 | 477 { |
478 int i; | |
4176 | 479 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 480 |
2257 | 481 /* read the pixels */ |
482 for(i=0;i<2;i++) { | |
483 pixels[0] = cm[block[0]]; | |
484 pixels[1] = cm[block[1]]; | |
485 | |
486 pixels += line_size; | |
487 block += 8; | |
488 } | |
489 } | |
490 | |
2967 | 491 static void put_signed_pixels_clamped_c(const DCTELEM *block, |
1984
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
492 uint8_t *restrict pixels, |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
493 int line_size) |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
494 { |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
495 int i, j; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
496 |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
497 for (i = 0; i < 8; i++) { |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
498 for (j = 0; j < 8; j++) { |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
499 if (*block < -128) |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
500 *pixels = 0; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
501 else if (*block > 127) |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
502 *pixels = 255; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
503 else |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
504 *pixels = (uint8_t)(*block + 128); |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
505 block++; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
506 pixels++; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
507 } |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
508 pixels += (line_size - 8); |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
509 } |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
510 } |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
511 |
11231 | 512 static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
513 int line_size) | |
514 { | |
515 int i; | |
516 | |
517 /* read the pixels */ | |
518 for(i=0;i<8;i++) { | |
519 pixels[0] = block[0]; | |
520 pixels[1] = block[1]; | |
521 pixels[2] = block[2]; | |
522 pixels[3] = block[3]; | |
523 pixels[4] = block[4]; | |
524 pixels[5] = block[5]; | |
525 pixels[6] = block[6]; | |
526 pixels[7] = block[7]; | |
527 | |
528 pixels += line_size; | |
529 block += 8; | |
530 } | |
531 } | |
532 | |
1064 | 533 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
516 | 534 int line_size) |
0 | 535 { |
536 int i; | |
4176 | 537 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 538 |
0 | 539 /* read the pixels */ |
540 for(i=0;i<8;i++) { | |
516 | 541 pixels[0] = cm[pixels[0] + block[0]]; |
542 pixels[1] = cm[pixels[1] + block[1]]; | |
543 pixels[2] = cm[pixels[2] + block[2]]; | |
544 pixels[3] = cm[pixels[3] + block[3]]; | |
545 pixels[4] = cm[pixels[4] + block[4]]; | |
546 pixels[5] = cm[pixels[5] + block[5]]; | |
547 pixels[6] = cm[pixels[6] + block[6]]; | |
548 pixels[7] = cm[pixels[7] + block[7]]; | |
549 pixels += line_size; | |
550 block += 8; | |
0 | 551 } |
552 } | |
2256 | 553 |
554 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, | |
555 int line_size) | |
556 { | |
557 int i; | |
4176 | 558 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 559 |
2256 | 560 /* read the pixels */ |
561 for(i=0;i<4;i++) { | |
562 pixels[0] = cm[pixels[0] + block[0]]; | |
563 pixels[1] = cm[pixels[1] + block[1]]; | |
564 pixels[2] = cm[pixels[2] + block[2]]; | |
565 pixels[3] = cm[pixels[3] + block[3]]; | |
566 pixels += line_size; | |
567 block += 8; | |
568 } | |
569 } | |
2257 | 570 |
571 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, | |
572 int line_size) | |
573 { | |
574 int i; | |
4176 | 575 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 576 |
2257 | 577 /* read the pixels */ |
578 for(i=0;i<2;i++) { | |
579 pixels[0] = cm[pixels[0] + block[0]]; | |
580 pixels[1] = cm[pixels[1] + block[1]]; | |
581 pixels += line_size; | |
582 block += 8; | |
583 } | |
584 } | |
2763 | 585 |
586 static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) | |
587 { | |
588 int i; | |
589 for(i=0;i<8;i++) { | |
590 pixels[0] += block[0]; | |
591 pixels[1] += block[1]; | |
592 pixels[2] += block[2]; | |
593 pixels[3] += block[3]; | |
594 pixels[4] += block[4]; | |
595 pixels[5] += block[5]; | |
596 pixels[6] += block[6]; | |
597 pixels[7] += block[7]; | |
598 pixels += line_size; | |
599 block += 8; | |
600 } | |
601 } | |
602 | |
603 static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) | |
604 { | |
605 int i; | |
606 for(i=0;i<4;i++) { | |
607 pixels[0] += block[0]; | |
608 pixels[1] += block[1]; | |
609 pixels[2] += block[2]; | |
610 pixels[3] += block[3]; | |
611 pixels += line_size; | |
612 block += 4; | |
613 } | |
614 } | |
615 | |
4988
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
616 static int sum_abs_dctelem_c(DCTELEM *block) |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
617 { |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
618 int sum=0, i; |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
619 for(i=0; i<64; i++) |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
620 sum+= FFABS(block[i]); |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
621 return sum; |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
622 } |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
623 |
11231 | 624 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h) |
625 { | |
626 int i; | |
627 | |
628 for (i = 0; i < h; i++) { | |
629 memset(block, value, 16); | |
630 block += line_size; | |
631 } | |
632 } | |
633 | |
634 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h) | |
635 { | |
636 int i; | |
637 | |
638 for (i = 0; i < h; i++) { | |
639 memset(block, value, 8); | |
640 block += line_size; | |
641 } | |
642 } | |
643 | |
644 static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize) | |
645 { | |
646 int i, j; | |
11459 | 647 uint16_t *dst1 = (uint16_t *) dst; |
648 uint16_t *dst2 = (uint16_t *)(dst + linesize); | |
11231 | 649 |
650 for (j = 0; j < 8; j++) { | |
651 for (i = 0; i < 8; i++) { | |
652 dst1[i] = dst2[i] = src[i] * 0x0101; | |
653 } | |
654 src += 8; | |
655 dst1 += linesize; | |
656 dst2 += linesize; | |
657 } | |
658 } | |
659 | |
385 | 660 #if 0 |
661 | |
662 #define PIXOP2(OPNAME, OP) \ | |
651 | 663 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 664 {\ |
665 int i;\ | |
666 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
667 OP(*((uint64_t*)block), AV_RN64(pixels));\ |
385 | 668 pixels+=line_size;\ |
669 block +=line_size;\ | |
670 }\ | |
671 }\ | |
672 \ | |
859 | 673 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 674 {\ |
675 int i;\ | |
676 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
677 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
678 const uint64_t b= AV_RN64(pixels+1);\ |
385 | 679 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
680 pixels+=line_size;\ | |
681 block +=line_size;\ | |
682 }\ | |
683 }\ | |
684 \ | |
859 | 685 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 686 {\ |
687 int i;\ | |
688 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
689 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
690 const uint64_t b= AV_RN64(pixels+1);\ |
385 | 691 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
692 pixels+=line_size;\ | |
693 block +=line_size;\ | |
694 }\ | |
695 }\ | |
696 \ | |
859 | 697 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 698 {\ |
699 int i;\ | |
700 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
701 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
702 const uint64_t b= AV_RN64(pixels+line_size);\ |
385 | 703 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
704 pixels+=line_size;\ | |
705 block +=line_size;\ | |
706 }\ | |
707 }\ | |
708 \ | |
859 | 709 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 710 {\ |
711 int i;\ | |
712 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
713 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
714 const uint64_t b= AV_RN64(pixels+line_size);\ |
385 | 715 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
716 pixels+=line_size;\ | |
717 block +=line_size;\ | |
718 }\ | |
719 }\ | |
720 \ | |
859 | 721 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 722 {\ |
723 int i;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
724 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
725 const uint64_t b= AV_RN64(pixels+1);\ |
385 | 726 uint64_t l0= (a&0x0303030303030303ULL)\ |
727 + (b&0x0303030303030303ULL)\ | |
728 + 0x0202020202020202ULL;\ | |
729 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
730 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
731 uint64_t l1,h1;\ | |
732 \ | |
733 pixels+=line_size;\ | |
734 for(i=0; i<h; i+=2){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
735 uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
736 uint64_t b= AV_RN64(pixels+1);\ |
385 | 737 l1= (a&0x0303030303030303ULL)\ |
738 + (b&0x0303030303030303ULL);\ | |
739 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
740 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
741 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
742 pixels+=line_size;\ | |
743 block +=line_size;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
744 a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
745 b= AV_RN64(pixels+1);\ |
385 | 746 l0= (a&0x0303030303030303ULL)\ |
747 + (b&0x0303030303030303ULL)\ | |
748 + 0x0202020202020202ULL;\ | |
749 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
750 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
751 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
752 pixels+=line_size;\ | |
753 block +=line_size;\ | |
754 }\ | |
755 }\ | |
756 \ | |
859 | 757 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 758 {\ |
759 int i;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
760 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
761 const uint64_t b= AV_RN64(pixels+1);\ |
385 | 762 uint64_t l0= (a&0x0303030303030303ULL)\ |
763 + (b&0x0303030303030303ULL)\ | |
764 + 0x0101010101010101ULL;\ | |
765 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
766 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
767 uint64_t l1,h1;\ | |
768 \ | |
769 pixels+=line_size;\ | |
770 for(i=0; i<h; i+=2){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
771 uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
772 uint64_t b= AV_RN64(pixels+1);\ |
385 | 773 l1= (a&0x0303030303030303ULL)\ |
774 + (b&0x0303030303030303ULL);\ | |
775 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
776 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
777 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
778 pixels+=line_size;\ | |
779 block +=line_size;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
780 a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
781 b= AV_RN64(pixels+1);\ |
385 | 782 l0= (a&0x0303030303030303ULL)\ |
783 + (b&0x0303030303030303ULL)\ | |
784 + 0x0101010101010101ULL;\ | |
785 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
786 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
787 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
788 pixels+=line_size;\ | |
789 block +=line_size;\ | |
790 }\ | |
791 }\ | |
792 \ | |
859 | 793 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\ |
794 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\ | |
795 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\ | |
796 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\ | |
797 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\ | |
798 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\ | |
799 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8) | |
385 | 800 |
801 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) | |
802 #else // 64 bit variant | |
803 | |
804 #define PIXOP2(OPNAME, OP) \ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
805 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
806 int i;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
807 for(i=0; i<h; i++){\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
808 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
809 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
810 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
811 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
812 }\ |
1168 | 813 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
814 int i;\ | |
815 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
816 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ |
1168 | 817 pixels+=line_size;\ |
818 block +=line_size;\ | |
819 }\ | |
820 }\ | |
859 | 821 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
385 | 822 int i;\ |
823 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
824 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
825 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\ |
385 | 826 pixels+=line_size;\ |
827 block +=line_size;\ | |
828 }\ | |
829 }\ | |
859 | 830 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
831 OPNAME ## _pixels8_c(block, pixels, line_size, h);\ | |
651 | 832 }\ |
385 | 833 \ |
651 | 834 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
835 int src_stride1, int src_stride2, int h){\ | |
385 | 836 int i;\ |
837 for(i=0; i<h; i++){\ | |
651 | 838 uint32_t a,b;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
839 a= AV_RN32(&src1[i*src_stride1 ]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
840 b= AV_RN32(&src2[i*src_stride2 ]);\ |
1264 | 841 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
842 a= AV_RN32(&src1[i*src_stride1+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
843 b= AV_RN32(&src2[i*src_stride2+4]);\ |
1264 | 844 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ |
385 | 845 }\ |
846 }\ | |
847 \ | |
651 | 848 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
849 int src_stride1, int src_stride2, int h){\ | |
385 | 850 int i;\ |
851 for(i=0; i<h; i++){\ | |
651 | 852 uint32_t a,b;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
853 a= AV_RN32(&src1[i*src_stride1 ]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
854 b= AV_RN32(&src2[i*src_stride2 ]);\ |
1264 | 855 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
856 a= AV_RN32(&src1[i*src_stride1+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
857 b= AV_RN32(&src2[i*src_stride2+4]);\ |
1264 | 858 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ |
385 | 859 }\ |
860 }\ | |
861 \ | |
1168 | 862 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
863 int src_stride1, int src_stride2, int h){\ | |
864 int i;\ | |
865 for(i=0; i<h; i++){\ | |
866 uint32_t a,b;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
867 a= AV_RN32(&src1[i*src_stride1 ]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
868 b= AV_RN32(&src2[i*src_stride2 ]);\ |
1264 | 869 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
1168 | 870 }\ |
871 }\ | |
872 \ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
873 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
874 int src_stride1, int src_stride2, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
875 int i;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
876 for(i=0; i<h; i++){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
877 uint32_t a,b;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
878 a= AV_RN16(&src1[i*src_stride1 ]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
879 b= AV_RN16(&src2[i*src_stride2 ]);\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
880 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
881 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
882 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
883 \ |
651 | 884 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
885 int src_stride1, int src_stride2, int h){\ | |
886 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | |
887 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | |
888 }\ | |
889 \ | |
890 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | |
891 int src_stride1, int src_stride2, int h){\ | |
892 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | |
893 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | |
894 }\ | |
895 \ | |
859 | 896 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 897 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
898 }\ | |
899 \ | |
859 | 900 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 901 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
902 }\ | |
903 \ | |
859 | 904 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 905 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
906 }\ | |
907 \ | |
859 | 908 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 909 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
385 | 910 }\ |
911 \ | |
11783 | 912 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ |
651 | 913 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
914 int i;\ | |
915 for(i=0; i<h; i++){\ | |
916 uint32_t a, b, c, d, l0, l1, h0, h1;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
917 a= AV_RN32(&src1[i*src_stride1]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
918 b= AV_RN32(&src2[i*src_stride2]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
919 c= AV_RN32(&src3[i*src_stride3]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
920 d= AV_RN32(&src4[i*src_stride4]);\ |
651 | 921 l0= (a&0x03030303UL)\ |
922 + (b&0x03030303UL)\ | |
923 + 0x02020202UL;\ | |
924 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
925 + ((b&0xFCFCFCFCUL)>>2);\ | |
926 l1= (c&0x03030303UL)\ | |
927 + (d&0x03030303UL);\ | |
928 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
929 + ((d&0xFCFCFCFCUL)>>2);\ | |
930 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
931 a= AV_RN32(&src1[i*src_stride1+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
932 b= AV_RN32(&src2[i*src_stride2+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
933 c= AV_RN32(&src3[i*src_stride3+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
934 d= AV_RN32(&src4[i*src_stride4+4]);\ |
651 | 935 l0= (a&0x03030303UL)\ |
936 + (b&0x03030303UL)\ | |
937 + 0x02020202UL;\ | |
938 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
939 + ((b&0xFCFCFCFCUL)>>2);\ | |
940 l1= (c&0x03030303UL)\ | |
941 + (d&0x03030303UL);\ | |
942 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
943 + ((d&0xFCFCFCFCUL)>>2);\ | |
944 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
945 }\ | |
946 }\ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
947 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
948 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
949 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
950 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
951 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
952 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
953 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
954 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
955 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
956 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
957 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
958 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
959 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
960 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
961 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
962 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
963 \ |
11783 | 964 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ |
651 | 965 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
385 | 966 int i;\ |
967 for(i=0; i<h; i++){\ | |
651 | 968 uint32_t a, b, c, d, l0, l1, h0, h1;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
969 a= AV_RN32(&src1[i*src_stride1]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
970 b= AV_RN32(&src2[i*src_stride2]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
971 c= AV_RN32(&src3[i*src_stride3]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
972 d= AV_RN32(&src4[i*src_stride4]);\ |
651 | 973 l0= (a&0x03030303UL)\ |
974 + (b&0x03030303UL)\ | |
975 + 0x01010101UL;\ | |
976 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
977 + ((b&0xFCFCFCFCUL)>>2);\ | |
978 l1= (c&0x03030303UL)\ | |
979 + (d&0x03030303UL);\ | |
980 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
981 + ((d&0xFCFCFCFCUL)>>2);\ | |
982 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
983 a= AV_RN32(&src1[i*src_stride1+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
984 b= AV_RN32(&src2[i*src_stride2+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
985 c= AV_RN32(&src3[i*src_stride3+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
986 d= AV_RN32(&src4[i*src_stride4+4]);\ |
651 | 987 l0= (a&0x03030303UL)\ |
988 + (b&0x03030303UL)\ | |
989 + 0x01010101UL;\ | |
990 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
991 + ((b&0xFCFCFCFCUL)>>2);\ | |
992 l1= (c&0x03030303UL)\ | |
993 + (d&0x03030303UL);\ | |
994 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
995 + ((d&0xFCFCFCFCUL)>>2);\ | |
996 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
385 | 997 }\ |
998 }\ | |
11783 | 999 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ |
651 | 1000 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
1001 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
1002 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
1003 }\ | |
11783 | 1004 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ |
651 | 1005 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
1006 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
1007 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
1008 }\ | |
385 | 1009 \ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1010 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1011 {\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1012 int i, a0, b0, a1, b1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1013 a0= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1014 b0= pixels[1] + 2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1015 a0 += b0;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1016 b0 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1017 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1018 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1019 for(i=0; i<h; i+=2){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1020 a1= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1021 b1= pixels[1];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1022 a1 += b1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1023 b1 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1024 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1025 block[0]= (a1+a0)>>2; /* FIXME non put */\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1026 block[1]= (b1+b0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1027 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1028 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1029 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1030 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1031 a0= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1032 b0= pixels[1] + 2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1033 a0 += b0;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1034 b0 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1035 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1036 block[0]= (a1+a0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1037 block[1]= (b1+b0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1038 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1039 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1040 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1041 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1042 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1043 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1044 {\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1045 int i;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1046 const uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1047 const uint32_t b= AV_RN32(pixels+1);\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1048 uint32_t l0= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1049 + (b&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1050 + 0x02020202UL;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1051 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1052 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1053 uint32_t l1,h1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1054 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1055 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1056 for(i=0; i<h; i+=2){\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1057 uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1058 uint32_t b= AV_RN32(pixels+1);\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1059 l1= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1060 + (b&0x03030303UL);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1061 h1= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1062 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1063 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1064 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1065 block +=line_size;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1066 a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1067 b= AV_RN32(pixels+1);\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1068 l0= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1069 + (b&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1070 + 0x02020202UL;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1071 h0= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1072 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1073 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1074 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1075 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1076 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1077 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1078 \ |
859 | 1079 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 1080 {\ |
1081 int j;\ | |
1082 for(j=0; j<2; j++){\ | |
1083 int i;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1084 const uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1085 const uint32_t b= AV_RN32(pixels+1);\ |
385 | 1086 uint32_t l0= (a&0x03030303UL)\ |
1087 + (b&0x03030303UL)\ | |
1088 + 0x02020202UL;\ | |
1089 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1090 + ((b&0xFCFCFCFCUL)>>2);\ | |
1091 uint32_t l1,h1;\ | |
1092 \ | |
1093 pixels+=line_size;\ | |
1094 for(i=0; i<h; i+=2){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1095 uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1096 uint32_t b= AV_RN32(pixels+1);\ |
385 | 1097 l1= (a&0x03030303UL)\ |
1098 + (b&0x03030303UL);\ | |
1099 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
1100 + ((b&0xFCFCFCFCUL)>>2);\ | |
1101 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1102 pixels+=line_size;\ | |
1103 block +=line_size;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1104 a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1105 b= AV_RN32(pixels+1);\ |
385 | 1106 l0= (a&0x03030303UL)\ |
1107 + (b&0x03030303UL)\ | |
1108 + 0x02020202UL;\ | |
1109 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1110 + ((b&0xFCFCFCFCUL)>>2);\ | |
1111 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1112 pixels+=line_size;\ | |
1113 block +=line_size;\ | |
1114 }\ | |
1115 pixels+=4-line_size*(h+1);\ | |
1116 block +=4-line_size*h;\ | |
1117 }\ | |
1118 }\ | |
1119 \ | |
859 | 1120 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 1121 {\ |
1122 int j;\ | |
1123 for(j=0; j<2; j++){\ | |
1124 int i;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1125 const uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1126 const uint32_t b= AV_RN32(pixels+1);\ |
385 | 1127 uint32_t l0= (a&0x03030303UL)\ |
1128 + (b&0x03030303UL)\ | |
1129 + 0x01010101UL;\ | |
1130 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1131 + ((b&0xFCFCFCFCUL)>>2);\ | |
1132 uint32_t l1,h1;\ | |
1133 \ | |
1134 pixels+=line_size;\ | |
1135 for(i=0; i<h; i+=2){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1136 uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1137 uint32_t b= AV_RN32(pixels+1);\ |
385 | 1138 l1= (a&0x03030303UL)\ |
1139 + (b&0x03030303UL);\ | |
1140 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
1141 + ((b&0xFCFCFCFCUL)>>2);\ | |
1142 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1143 pixels+=line_size;\ | |
1144 block +=line_size;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1145 a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1146 b= AV_RN32(pixels+1);\ |
385 | 1147 l0= (a&0x03030303UL)\ |
1148 + (b&0x03030303UL)\ | |
1149 + 0x01010101UL;\ | |
1150 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1151 + ((b&0xFCFCFCFCUL)>>2);\ | |
1152 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1153 pixels+=line_size;\ | |
1154 block +=line_size;\ | |
1155 }\ | |
1156 pixels+=4-line_size*(h+1);\ | |
1157 block +=4-line_size*h;\ | |
1158 }\ | |
1159 }\ | |
1160 \ | |
859 | 1161 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\ |
1162 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\ | |
1163 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\ | |
1164 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\ | |
12423 | 1165 av_unused CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\ |
859 | 1166 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\ |
1167 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ | |
1168 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ | |
651 | 1169 |
1264 | 1170 #define op_avg(a, b) a = rnd_avg32(a, b) |
385 | 1171 #endif |
1172 #define op_put(a, b) a = b | |
1173 | |
1174 PIXOP2(avg, op_avg) | |
1175 PIXOP2(put, op_put) | |
1176 #undef op_avg | |
1177 #undef op_put | |
1178 | |
12423 | 1179 #define put_no_rnd_pixels8_c put_pixels8_c |
1180 #define put_no_rnd_pixels16_c put_pixels16_c | |
1181 | |
0 | 1182 #define avg2(a,b) ((a+b+1)>>1) |
1183 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) | |
1184 | |
1864 | 1185 static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ |
1186 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h); | |
1187 } | |
1188 | |
1189 static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ | |
1190 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h); | |
1191 } | |
753 | 1192 |
1064 | 1193 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) |
255 | 1194 { |
1195 const int A=(16-x16)*(16-y16); | |
1196 const int B=( x16)*(16-y16); | |
1197 const int C=(16-x16)*( y16); | |
1198 const int D=( x16)*( y16); | |
1199 int i; | |
1200 | |
1201 for(i=0; i<h; i++) | |
1202 { | |
651 | 1203 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; |
1204 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; | |
1205 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; | |
1206 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8; | |
1207 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8; | |
1208 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8; | |
1209 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; | |
1210 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; | |
1211 dst+= stride; | |
1212 src+= stride; | |
255 | 1213 } |
1214 } | |
1215 | |
3248
7aa9f80e7954
mmx implementation of 3-point GMC. (5x faster than C)
lorenm
parents:
3245
diff
changeset
|
1216 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, |
753 | 1217 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) |
1218 { | |
1219 int y, vx, vy; | |
1220 const int s= 1<<shift; | |
2967 | 1221 |
753 | 1222 width--; |
1223 height--; | |
1224 | |
1225 for(y=0; y<h; y++){ | |
1226 int x; | |
1227 | |
1228 vx= ox; | |
1229 vy= oy; | |
1230 for(x=0; x<8; x++){ //XXX FIXME optimize | |
1231 int src_x, src_y, frac_x, frac_y, index; | |
1232 | |
1233 src_x= vx>>16; | |
1234 src_y= vy>>16; | |
1235 frac_x= src_x&(s-1); | |
1236 frac_y= src_y&(s-1); | |
1237 src_x>>=shift; | |
1238 src_y>>=shift; | |
2967 | 1239 |
753 | 1240 if((unsigned)src_x < width){ |
1241 if((unsigned)src_y < height){ | |
1242 index= src_x + src_y*stride; | |
1243 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) | |
1244 + src[index +1]* frac_x )*(s-frac_y) | |
1245 + ( src[index+stride ]*(s-frac_x) | |
1246 + src[index+stride+1]* frac_x )* frac_y | |
1247 + r)>>(shift*2); | |
1248 }else{ | |
4594 | 1249 index= src_x + av_clip(src_y, 0, height)*stride; |
2967 | 1250 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) |
753 | 1251 + src[index +1]* frac_x )*s |
1252 + r)>>(shift*2); | |
1253 } | |
1254 }else{ | |
1255 if((unsigned)src_y < height){ | |
4594 | 1256 index= av_clip(src_x, 0, width) + src_y*stride; |
2967 | 1257 dst[y*stride + x]= ( ( src[index ]*(s-frac_y) |
753 | 1258 + src[index+stride ]* frac_y )*s |
1259 + r)>>(shift*2); | |
1260 }else{ | |
4594 | 1261 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride; |
753 | 1262 dst[y*stride + x]= src[index ]; |
1263 } | |
1264 } | |
2967 | 1265 |
753 | 1266 vx+= dxx; |
1267 vy+= dyx; | |
1268 } | |
1269 ox += dxy; | |
1270 oy += dyy; | |
1271 } | |
1272 } | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1273 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1274 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1275 switch(width){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1276 case 2: put_pixels2_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1277 case 4: put_pixels4_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1278 case 8: put_pixels8_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1279 case 16:put_pixels16_c(dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1280 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1281 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1282 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1283 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1284 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1285 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1286 for (j=0; j < width; j++) { |
2979 | 1287 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1288 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1289 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1290 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1291 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1292 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1293 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1294 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1295 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1296 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1297 for (j=0; j < width; j++) { |
2979 | 1298 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1299 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1300 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1301 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1302 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1303 } |
2967 | 1304 |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1305 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1306 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1307 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1308 for (j=0; j < width; j++) { |
2979 | 1309 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1310 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1311 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1312 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1313 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1314 } |
2967 | 1315 |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1316 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1317 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1318 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1319 for (j=0; j < width; j++) { |
2979 | 1320 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1321 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1322 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1323 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1324 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1325 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1326 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1327 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1328 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1329 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1330 for (j=0; j < width; j++) { |
2979 | 1331 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1332 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1333 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1334 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1335 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1336 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1337 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1338 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1339 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1340 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1341 for (j=0; j < width; j++) { |
2979 | 1342 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1343 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1344 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1345 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1346 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1347 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1348 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1349 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1350 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1351 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1352 for (j=0; j < width; j++) { |
2979 | 1353 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1354 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1355 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1356 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1357 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1358 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1359 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1360 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1361 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1362 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1363 for (j=0; j < width; j++) { |
2979 | 1364 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1365 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1366 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1367 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1368 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1369 } |
1319 | 1370 |
1371 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1372 switch(width){ | |
1373 case 2: avg_pixels2_c (dst, src, stride, height); break; | |
1374 case 4: avg_pixels4_c (dst, src, stride, height); break; | |
1375 case 8: avg_pixels8_c (dst, src, stride, height); break; | |
1376 case 16:avg_pixels16_c(dst, src, stride, height); break; | |
1377 } | |
1378 } | |
1379 | |
1380 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1381 int i,j; | |
1382 for (i=0; i < height; i++) { | |
1383 for (j=0; j < width; j++) { | |
2979 | 1384 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; |
1319 | 1385 } |
1386 src += stride; | |
1387 dst += stride; | |
1388 } | |
1389 } | |
1390 | |
1391 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1392 int i,j; | |
1393 for (i=0; i < height; i++) { | |
1394 for (j=0; j < width; j++) { | |
2979 | 1395 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; |
1319 | 1396 } |
1397 src += stride; | |
1398 dst += stride; | |
1399 } | |
1400 } | |
2967 | 1401 |
1319 | 1402 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
1403 int i,j; | |
1404 for (i=0; i < height; i++) { | |
1405 for (j=0; j < width; j++) { | |
2979 | 1406 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; |
1319 | 1407 } |
1408 src += stride; | |
1409 dst += stride; | |
1410 } | |
1411 } | |
2967 | 1412 |
1319 | 1413 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
1414 int i,j; | |
1415 for (i=0; i < height; i++) { | |
1416 for (j=0; j < width; j++) { | |
2979 | 1417 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1418 } |
1419 src += stride; | |
1420 dst += stride; | |
1421 } | |
1422 } | |
1423 | |
1424 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1425 int i,j; | |
1426 for (i=0; i < height; i++) { | |
1427 for (j=0; j < width; j++) { | |
2979 | 1428 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1429 } |
1430 src += stride; | |
1431 dst += stride; | |
1432 } | |
1433 } | |
1434 | |
1435 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1436 int i,j; | |
1437 for (i=0; i < height; i++) { | |
1438 for (j=0; j < width; j++) { | |
2979 | 1439 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; |
1319 | 1440 } |
1441 src += stride; | |
1442 dst += stride; | |
1443 } | |
1444 } | |
1445 | |
1446 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1447 int i,j; | |
1448 for (i=0; i < height; i++) { | |
1449 for (j=0; j < width; j++) { | |
2979 | 1450 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1451 } |
1452 src += stride; | |
1453 dst += stride; | |
1454 } | |
1455 } | |
1456 | |
1457 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1458 int i,j; | |
1459 for (i=0; i < height; i++) { | |
1460 for (j=0; j < width; j++) { | |
2979 | 1461 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1462 } |
1463 src += stride; | |
1464 dst += stride; | |
1465 } | |
1466 } | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1467 #if 0 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1468 #define TPEL_WIDTH(width)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1469 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1470 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1471 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1472 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1473 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1474 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1475 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1476 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1477 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1478 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1479 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1480 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1481 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1482 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1483 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1484 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1485 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1486 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);} |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1487 #endif |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1488 |
1168 | 1489 #define H264_CHROMA_MC(OPNAME, OP)\ |
1490 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1491 const int A=(8-x)*(8-y);\ | |
1492 const int B=( x)*(8-y);\ | |
1493 const int C=(8-x)*( y);\ | |
1494 const int D=( x)*( y);\ | |
1495 int i;\ | |
1496 \ | |
1497 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1498 \ | |
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1499 if(D){\ |
6054 | 1500 for(i=0; i<h; i++){\ |
6053 | 1501 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
1502 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1503 dst+= stride;\ | |
1504 src+= stride;\ | |
1505 }\ | |
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1506 }else{\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1507 const int E= B+C;\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1508 const int step= C ? stride : 1;\ |
6054 | 1509 for(i=0; i<h; i++){\ |
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1510 OP(dst[0], (A*src[0] + E*src[step+0]));\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1511 OP(dst[1], (A*src[1] + E*src[step+1]));\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1512 dst+= stride;\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1513 src+= stride;\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1514 }\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1515 }\ |
1168 | 1516 }\ |
1517 \ | |
1518 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1519 const int A=(8-x)*(8-y);\ | |
1520 const int B=( x)*(8-y);\ | |
1521 const int C=(8-x)*( y);\ | |
1522 const int D=( x)*( y);\ | |
1523 int i;\ | |
1524 \ | |
1525 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1526 \ | |
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1527 if(D){\ |
6054 | 1528 for(i=0; i<h; i++){\ |
6053 | 1529 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
1530 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1531 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | |
1532 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | |
1533 dst+= stride;\ | |
1534 src+= stride;\ | |
1535 }\ | |
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1536 }else{\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1537 const int E= B+C;\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1538 const int step= C ? stride : 1;\ |
6054 | 1539 for(i=0; i<h; i++){\ |
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1540 OP(dst[0], (A*src[0] + E*src[step+0]));\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1541 OP(dst[1], (A*src[1] + E*src[step+1]));\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1542 OP(dst[2], (A*src[2] + E*src[step+2]));\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1543 OP(dst[3], (A*src[3] + E*src[step+3]));\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1544 dst+= stride;\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1545 src+= stride;\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1546 }\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1547 }\ |
1168 | 1548 }\ |
1549 \ | |
1550 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1551 const int A=(8-x)*(8-y);\ | |
1552 const int B=( x)*(8-y);\ | |
1553 const int C=(8-x)*( y);\ | |
1554 const int D=( x)*( y);\ | |
1555 int i;\ | |
1556 \ | |
1557 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1558 \ | |
6051
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1559 if(D){\ |
6054 | 1560 for(i=0; i<h; i++){\ |
6053 | 1561 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
1562 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1563 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | |
1564 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | |
1565 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ | |
1566 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ | |
1567 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ | |
1568 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ | |
1569 dst+= stride;\ | |
1570 src+= stride;\ | |
1571 }\ | |
6051
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1572 }else{\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1573 const int E= B+C;\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1574 const int step= C ? stride : 1;\ |
6054 | 1575 for(i=0; i<h; i++){\ |
6051
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1576 OP(dst[0], (A*src[0] + E*src[step+0]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1577 OP(dst[1], (A*src[1] + E*src[step+1]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1578 OP(dst[2], (A*src[2] + E*src[step+2]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1579 OP(dst[3], (A*src[3] + E*src[step+3]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1580 OP(dst[4], (A*src[4] + E*src[step+4]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1581 OP(dst[5], (A*src[5] + E*src[step+5]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1582 OP(dst[6], (A*src[6] + E*src[step+6]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1583 OP(dst[7], (A*src[7] + E*src[step+7]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1584 dst+= stride;\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1585 src+= stride;\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1586 }\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1587 }\ |
1168 | 1588 } |
1589 | |
1590 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) | |
1591 #define op_put(a, b) a = (((b) + 32)>>6) | |
1592 | |
1593 H264_CHROMA_MC(put_ , op_put) | |
1594 H264_CHROMA_MC(avg_ , op_avg) | |
1595 #undef op_avg | |
1596 #undef op_put | |
1597 | |
9439
ef3a7b711cc0
Rename put_no_rnd_h264_chroma* to reflect its usage in VC1 only
conrad
parents:
9437
diff
changeset
|
1598 static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ |
3663 | 1599 const int A=(8-x)*(8-y); |
1600 const int B=( x)*(8-y); | |
1601 const int C=(8-x)*( y); | |
1602 const int D=( x)*( y); | |
1603 int i; | |
1604 | |
1605 assert(x<8 && y<8 && x>=0 && y>=0); | |
1606 | |
1607 for(i=0; i<h; i++) | |
1608 { | |
1609 dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6; | |
1610 dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6; | |
1611 dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6; | |
1612 dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6; | |
1613 dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6; | |
1614 dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6; | |
1615 dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6; | |
1616 dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6; | |
1617 dst+= stride; | |
1618 src+= stride; | |
1619 } | |
1620 } | |
1621 | |
9440 | 1622 static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ |
1623 const int A=(8-x)*(8-y); | |
1624 const int B=( x)*(8-y); | |
1625 const int C=(8-x)*( y); | |
1626 const int D=( x)*( y); | |
1627 int i; | |
1628 | |
1629 assert(x<8 && y<8 && x>=0 && y>=0); | |
1630 | |
1631 for(i=0; i<h; i++) | |
1632 { | |
1633 dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6)); | |
1634 dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6)); | |
1635 dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6)); | |
1636 dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6)); | |
1637 dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6)); | |
1638 dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6)); | |
1639 dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6)); | |
1640 dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6)); | |
1641 dst+= stride; | |
1642 src+= stride; | |
1643 } | |
1644 } | |
1645 | |
651 | 1646 #define QPEL_MC(r, OPNAME, RND, OP) \ |
1064 | 1647 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
4176 | 1648 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
651 | 1649 int i;\ |
1650 for(i=0; i<h; i++)\ | |
1651 {\ | |
1652 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\ | |
1653 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\ | |
1654 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\ | |
1655 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\ | |
1656 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\ | |
1657 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\ | |
1658 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\ | |
1659 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\ | |
1660 dst+=dstStride;\ | |
1661 src+=srcStride;\ | |
1662 }\ | |
1663 }\ | |
1664 \ | |
1064 | 1665 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
984 | 1666 const int w=8;\ |
4176 | 1667 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
651 | 1668 int i;\ |
1669 for(i=0; i<w; i++)\ | |
1670 {\ | |
1671 const int src0= src[0*srcStride];\ | |
1672 const int src1= src[1*srcStride];\ | |
1673 const int src2= src[2*srcStride];\ | |
1674 const int src3= src[3*srcStride];\ | |
1675 const int src4= src[4*srcStride];\ | |
1676 const int src5= src[5*srcStride];\ | |
1677 const int src6= src[6*srcStride];\ | |
1678 const int src7= src[7*srcStride];\ | |
1679 const int src8= src[8*srcStride];\ | |
1680 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\ | |
1681 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\ | |
1682 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\ | |
1683 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\ | |
1684 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\ | |
1685 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\ | |
1686 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\ | |
1687 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ | |
1688 dst++;\ | |
1689 src++;\ | |
1690 }\ | |
1691 }\ | |
1692 \ | |
1064 | 1693 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
4176 | 1694 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
651 | 1695 int i;\ |
954 | 1696 \ |
651 | 1697 for(i=0; i<h; i++)\ |
1698 {\ | |
1699 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\ | |
1700 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\ | |
1701 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\ | |
1702 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\ | |
1703 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\ | |
1704 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\ | |
1705 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\ | |
1706 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\ | |
1707 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\ | |
1708 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\ | |
1709 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\ | |
1710 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\ | |
1711 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\ | |
1712 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\ | |
1713 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\ | |
1714 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\ | |
1715 dst+=dstStride;\ | |
1716 src+=srcStride;\ | |
1717 }\ | |
255 | 1718 }\ |
1719 \ | |
1064 | 1720 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
4176 | 1721 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
651 | 1722 int i;\ |
954 | 1723 const int w=16;\ |
651 | 1724 for(i=0; i<w; i++)\ |
1725 {\ | |
1726 const int src0= src[0*srcStride];\ | |
1727 const int src1= src[1*srcStride];\ | |
1728 const int src2= src[2*srcStride];\ | |
1729 const int src3= src[3*srcStride];\ | |
1730 const int src4= src[4*srcStride];\ | |
1731 const int src5= src[5*srcStride];\ | |
1732 const int src6= src[6*srcStride];\ | |
1733 const int src7= src[7*srcStride];\ | |
1734 const int src8= src[8*srcStride];\ | |
1735 const int src9= src[9*srcStride];\ | |
1736 const int src10= src[10*srcStride];\ | |
1737 const int src11= src[11*srcStride];\ | |
1738 const int src12= src[12*srcStride];\ | |
1739 const int src13= src[13*srcStride];\ | |
1740 const int src14= src[14*srcStride];\ | |
1741 const int src15= src[15*srcStride];\ | |
1742 const int src16= src[16*srcStride];\ | |
1743 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\ | |
1744 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\ | |
1745 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\ | |
1746 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\ | |
1747 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\ | |
1748 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\ | |
1749 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\ | |
1750 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\ | |
1751 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\ | |
1752 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\ | |
1753 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\ | |
1754 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\ | |
1755 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\ | |
1756 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\ | |
1757 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\ | |
1758 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ | |
1759 dst++;\ | |
1760 src++;\ | |
1761 }\ | |
255 | 1762 }\ |
1763 \ | |
1064 | 1764 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ |
1765 uint8_t half[64];\ | |
651 | 1766 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ |
1767 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\ | |
1768 }\ | |
1769 \ | |
1064 | 1770 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ |
651 | 1771 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\ |
255 | 1772 }\ |
1773 \ | |
1064 | 1774 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ |
1775 uint8_t half[64];\ | |
651 | 1776 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ |
1777 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\ | |
1778 }\ | |
1779 \ | |
1064 | 1780 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ |
1781 uint8_t full[16*9];\ | |
1782 uint8_t half[64];\ | |
651 | 1783 copy_block9(full, src, 16, stride, 9);\ |
984 | 1784 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ |
651 | 1785 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\ |
1786 }\ | |
1787 \ | |
1064 | 1788 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ |
1789 uint8_t full[16*9];\ | |
651 | 1790 copy_block9(full, src, 16, stride, 9);\ |
984 | 1791 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ |
255 | 1792 }\ |
1793 \ | |
1064 | 1794 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ |
1795 uint8_t full[16*9];\ | |
1796 uint8_t half[64];\ | |
651 | 1797 copy_block9(full, src, 16, stride, 9);\ |
984 | 1798 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ |
651 | 1799 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\ |
1800 }\ | |
1064 | 1801 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1802 uint8_t full[16*9];\ | |
1803 uint8_t halfH[72];\ | |
1804 uint8_t halfV[64];\ | |
1805 uint8_t halfHV[64];\ | |
651 | 1806 copy_block9(full, src, 16, stride, 9);\ |
1807 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1808 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1809 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1810 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1811 }\ |
1064 | 1812 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ |
1813 uint8_t full[16*9];\ | |
1814 uint8_t halfH[72];\ | |
1815 uint8_t halfHV[64];\ | |
984 | 1816 copy_block9(full, src, 16, stride, 9);\ |
1817 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1818 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1819 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1820 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
1821 }\ | |
1064 | 1822 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1823 uint8_t full[16*9];\ | |
1824 uint8_t halfH[72];\ | |
1825 uint8_t halfV[64];\ | |
1826 uint8_t halfHV[64];\ | |
651 | 1827 copy_block9(full, src, 16, stride, 9);\ |
1828 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1829 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1830 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1831 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1832 }\ |
1064 | 1833 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ |
1834 uint8_t full[16*9];\ | |
1835 uint8_t halfH[72];\ | |
1836 uint8_t halfHV[64];\ | |
984 | 1837 copy_block9(full, src, 16, stride, 9);\ |
1838 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1839 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1840 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1841 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
1842 }\ | |
1064 | 1843 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1844 uint8_t full[16*9];\ | |
1845 uint8_t halfH[72];\ | |
1846 uint8_t halfV[64];\ | |
1847 uint8_t halfHV[64];\ | |
651 | 1848 copy_block9(full, src, 16, stride, 9);\ |
1849 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1850 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1851 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1852 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
1853 }\ | |
1064 | 1854 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ |
1855 uint8_t full[16*9];\ | |
1856 uint8_t halfH[72];\ | |
1857 uint8_t halfHV[64];\ | |
984 | 1858 copy_block9(full, src, 16, stride, 9);\ |
1859 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1860 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1861 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1862 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
1863 }\ | |
1064 | 1864 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1865 uint8_t full[16*9];\ | |
1866 uint8_t halfH[72];\ | |
1867 uint8_t halfV[64];\ | |
1868 uint8_t halfHV[64];\ | |
651 | 1869 copy_block9(full, src, 16, stride, 9);\ |
1870 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\ | |
984 | 1871 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1872 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1873 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1874 }\ |
1064 | 1875 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ |
1876 uint8_t full[16*9];\ | |
1877 uint8_t halfH[72];\ | |
1878 uint8_t halfHV[64];\ | |
984 | 1879 copy_block9(full, src, 16, stride, 9);\ |
1880 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1881 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1882 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1883 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
1884 }\ | |
1064 | 1885 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ |
1886 uint8_t halfH[72];\ | |
1887 uint8_t halfHV[64];\ | |
651 | 1888 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1889 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ |
651 | 1890 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ |
1891 }\ | |
1064 | 1892 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ |
1893 uint8_t halfH[72];\ | |
1894 uint8_t halfHV[64];\ | |
651 | 1895 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1896 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ |
651 | 1897 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ |
1898 }\ | |
1064 | 1899 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1900 uint8_t full[16*9];\ | |
1901 uint8_t halfH[72];\ | |
1902 uint8_t halfV[64];\ | |
1903 uint8_t halfHV[64];\ | |
651 | 1904 copy_block9(full, src, 16, stride, 9);\ |
1905 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1906 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1907 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1908 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ |
255 | 1909 }\ |
1064 | 1910 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ |
1911 uint8_t full[16*9];\ | |
1912 uint8_t halfH[72];\ | |
984 | 1913 copy_block9(full, src, 16, stride, 9);\ |
1914 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1915 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1916 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
1917 }\ | |
1064 | 1918 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1919 uint8_t full[16*9];\ | |
1920 uint8_t halfH[72];\ | |
1921 uint8_t halfV[64];\ | |
1922 uint8_t halfHV[64];\ | |
651 | 1923 copy_block9(full, src, 16, stride, 9);\ |
1924 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1925 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1926 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1927 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ |
1928 }\ | |
1064 | 1929 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ |
1930 uint8_t full[16*9];\ | |
1931 uint8_t halfH[72];\ | |
984 | 1932 copy_block9(full, src, 16, stride, 9);\ |
1933 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1934 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1935 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
1936 }\ | |
1064 | 1937 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ |
1938 uint8_t halfH[72];\ | |
651 | 1939 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1940 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ |
651 | 1941 }\ |
1942 \ | |
1064 | 1943 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ |
1944 uint8_t half[256];\ | |
651 | 1945 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ |
1946 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\ | |
1947 }\ | |
1948 \ | |
1064 | 1949 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ |
651 | 1950 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\ |
1951 }\ | |
1952 \ | |
1064 | 1953 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ |
1954 uint8_t half[256];\ | |
651 | 1955 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ |
1956 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\ | |
1957 }\ | |
1958 \ | |
1064 | 1959 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ |
1960 uint8_t full[24*17];\ | |
1961 uint8_t half[256];\ | |
651 | 1962 copy_block17(full, src, 24, stride, 17);\ |
954 | 1963 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ |
651 | 1964 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\ |
255 | 1965 }\ |
651 | 1966 \ |
1064 | 1967 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ |
1968 uint8_t full[24*17];\ | |
651 | 1969 copy_block17(full, src, 24, stride, 17);\ |
954 | 1970 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\ |
651 | 1971 }\ |
1972 \ | |
1064 | 1973 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ |
1974 uint8_t full[24*17];\ | |
1975 uint8_t half[256];\ | |
651 | 1976 copy_block17(full, src, 24, stride, 17);\ |
954 | 1977 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ |
651 | 1978 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\ |
255 | 1979 }\ |
1064 | 1980 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1981 uint8_t full[24*17];\ | |
1982 uint8_t halfH[272];\ | |
1983 uint8_t halfV[256];\ | |
1984 uint8_t halfHV[256];\ | |
651 | 1985 copy_block17(full, src, 24, stride, 17);\ |
1986 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1987 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
1988 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1989 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
1990 }\ | |
1064 | 1991 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ |
1992 uint8_t full[24*17];\ | |
1993 uint8_t halfH[272];\ | |
1994 uint8_t halfHV[256];\ | |
984 | 1995 copy_block17(full, src, 24, stride, 17);\ |
1996 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1997 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
1998 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1999 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
2000 }\ | |
1064 | 2001 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
2002 uint8_t full[24*17];\ | |
2003 uint8_t halfH[272];\ | |
2004 uint8_t halfV[256];\ | |
2005 uint8_t halfHV[256];\ | |
651 | 2006 copy_block17(full, src, 24, stride, 17);\ |
2007 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 2008 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
2009 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 2010 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
2011 }\ | |
1064 | 2012 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ |
2013 uint8_t full[24*17];\ | |
2014 uint8_t halfH[272];\ | |
2015 uint8_t halfHV[256];\ | |
984 | 2016 copy_block17(full, src, 24, stride, 17);\ |
2017 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2018 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
2019 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
2020 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
2021 }\ | |
1064 | 2022 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
2023 uint8_t full[24*17];\ | |
2024 uint8_t halfH[272];\ | |
2025 uint8_t halfV[256];\ | |
2026 uint8_t halfHV[256];\ | |
651 | 2027 copy_block17(full, src, 24, stride, 17);\ |
2028 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 2029 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
2030 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 2031 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
255 | 2032 }\ |
1064 | 2033 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ |
2034 uint8_t full[24*17];\ | |
2035 uint8_t halfH[272];\ | |
2036 uint8_t halfHV[256];\ | |
984 | 2037 copy_block17(full, src, 24, stride, 17);\ |
2038 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2039 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
2040 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
2041 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
2042 }\ | |
1064 | 2043 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
2044 uint8_t full[24*17];\ | |
2045 uint8_t halfH[272];\ | |
2046 uint8_t halfV[256];\ | |
2047 uint8_t halfHV[256];\ | |
651 | 2048 copy_block17(full, src, 24, stride, 17);\ |
2049 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\ | |
954 | 2050 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
2051 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 2052 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
2053 }\ | |
1064 | 2054 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ |
2055 uint8_t full[24*17];\ | |
2056 uint8_t halfH[272];\ | |
2057 uint8_t halfHV[256];\ | |
984 | 2058 copy_block17(full, src, 24, stride, 17);\ |
2059 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2060 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
2061 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
2062 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
2063 }\ | |
1064 | 2064 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ |
2065 uint8_t halfH[272];\ | |
2066 uint8_t halfHV[256];\ | |
651 | 2067 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 2068 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ |
651 | 2069 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ |
255 | 2070 }\ |
1064 | 2071 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ |
2072 uint8_t halfH[272];\ | |
2073 uint8_t halfHV[256];\ | |
651 | 2074 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 2075 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ |
651 | 2076 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ |
2077 }\ | |
1064 | 2078 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
2079 uint8_t full[24*17];\ | |
2080 uint8_t halfH[272];\ | |
2081 uint8_t halfV[256];\ | |
2082 uint8_t halfHV[256];\ | |
651 | 2083 copy_block17(full, src, 24, stride, 17);\ |
2084 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 2085 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
2086 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 2087 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ |
255 | 2088 }\ |
1064 | 2089 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ |
2090 uint8_t full[24*17];\ | |
2091 uint8_t halfH[272];\ | |
984 | 2092 copy_block17(full, src, 24, stride, 17);\ |
2093 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2094 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
2095 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
2096 }\ | |
1064 | 2097 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
2098 uint8_t full[24*17];\ | |
2099 uint8_t halfH[272];\ | |
2100 uint8_t halfV[256];\ | |
2101 uint8_t halfHV[256];\ | |
651 | 2102 copy_block17(full, src, 24, stride, 17);\ |
2103 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 2104 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
2105 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 2106 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ |
2107 }\ | |
1064 | 2108 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ |
2109 uint8_t full[24*17];\ | |
2110 uint8_t halfH[272];\ | |
984 | 2111 copy_block17(full, src, 24, stride, 17);\ |
2112 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2113 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
2114 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
2115 }\ | |
1064 | 2116 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ |
2117 uint8_t halfH[272];\ | |
651 | 2118 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 2119 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ |
859 | 2120 } |
255 | 2121 |
651 | 2122 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) |
2123 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1) | |
2124 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
2125 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5] | |
2126 | |
2127 QPEL_MC(0, put_ , _ , op_put) | |
2128 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) | |
2129 QPEL_MC(0, avg_ , _ , op_avg) | |
2130 //QPEL_MC(1, avg_no_rnd , _ , op_avg) | |
2131 #undef op_avg | |
2132 #undef op_avg_no_rnd | |
2133 #undef op_put | |
2134 #undef op_put_no_rnd | |
255 | 2135 |
12423 | 2136 #define put_qpel8_mc00_c ff_put_pixels8x8_c |
2137 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c | |
2138 #define put_qpel16_mc00_c ff_put_pixels16x16_c | |
2139 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c | |
2140 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c | |
2141 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c | |
2142 | |
1168 | 2143 #if 1 |
2144 #define H264_LOWPASS(OPNAME, OP, OP2) \ | |
5151 | 2145 static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2146 const int h=2;\ |
4176 | 2147 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2148 int i;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2149 for(i=0; i<h; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2150 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2151 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2152 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2153 dst+=dstStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2154 src+=srcStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2155 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2156 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2157 \ |
5151 | 2158 static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2159 const int w=2;\ |
4176 | 2160 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2161 int i;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2162 for(i=0; i<w; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2163 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2164 const int srcB= src[-2*srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2165 const int srcA= src[-1*srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2166 const int src0= src[0 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2167 const int src1= src[1 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2168 const int src2= src[2 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2169 const int src3= src[3 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2170 const int src4= src[4 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2171 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2172 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2173 dst++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2174 src++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2175 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2176 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2177 \ |
5151 | 2178 static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2179 const int h=2;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2180 const int w=2;\ |
4176 | 2181 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2182 int i;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2183 src -= 2*srcStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2184 for(i=0; i<h+5; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2185 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2186 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2187 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2188 tmp+=tmpStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2189 src+=srcStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2190 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2191 tmp -= tmpStride*(h+5-2);\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2192 for(i=0; i<w; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2193 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2194 const int tmpB= tmp[-2*tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2195 const int tmpA= tmp[-1*tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2196 const int tmp0= tmp[0 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2197 const int tmp1= tmp[1 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2198 const int tmp2= tmp[2 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2199 const int tmp3= tmp[3 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2200 const int tmp4= tmp[4 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2201 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2202 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2203 dst++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2204 tmp++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2205 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2206 }\ |
1168 | 2207 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
2208 const int h=4;\ | |
4176 | 2209 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2210 int i;\ |
2211 for(i=0; i<h; i++)\ | |
2212 {\ | |
2213 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ | |
2214 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ | |
2215 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ | |
2216 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ | |
2217 dst+=dstStride;\ | |
2218 src+=srcStride;\ | |
2219 }\ | |
2220 }\ | |
2221 \ | |
2222 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2223 const int w=4;\ | |
4176 | 2224 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2225 int i;\ |
2226 for(i=0; i<w; i++)\ | |
2227 {\ | |
2228 const int srcB= src[-2*srcStride];\ | |
2229 const int srcA= src[-1*srcStride];\ | |
2230 const int src0= src[0 *srcStride];\ | |
2231 const int src1= src[1 *srcStride];\ | |
2232 const int src2= src[2 *srcStride];\ | |
2233 const int src3= src[3 *srcStride];\ | |
2234 const int src4= src[4 *srcStride];\ | |
2235 const int src5= src[5 *srcStride];\ | |
2236 const int src6= src[6 *srcStride];\ | |
2237 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
2238 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
2239 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
2240 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
2241 dst++;\ | |
2242 src++;\ | |
2243 }\ | |
2244 }\ | |
2245 \ | |
2246 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2247 const int h=4;\ | |
2248 const int w=4;\ | |
4176 | 2249 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2250 int i;\ |
2251 src -= 2*srcStride;\ | |
2252 for(i=0; i<h+5; i++)\ | |
2253 {\ | |
2254 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ | |
2255 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ | |
2256 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\ | |
2257 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\ | |
2258 tmp+=tmpStride;\ | |
2259 src+=srcStride;\ | |
2260 }\ | |
2261 tmp -= tmpStride*(h+5-2);\ | |
2262 for(i=0; i<w; i++)\ | |
2263 {\ | |
2264 const int tmpB= tmp[-2*tmpStride];\ | |
2265 const int tmpA= tmp[-1*tmpStride];\ | |
2266 const int tmp0= tmp[0 *tmpStride];\ | |
2267 const int tmp1= tmp[1 *tmpStride];\ | |
2268 const int tmp2= tmp[2 *tmpStride];\ | |
2269 const int tmp3= tmp[3 *tmpStride];\ | |
2270 const int tmp4= tmp[4 *tmpStride];\ | |
2271 const int tmp5= tmp[5 *tmpStride];\ | |
2272 const int tmp6= tmp[6 *tmpStride];\ | |
2273 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |
2274 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |
2275 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | |
2276 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | |
2277 dst++;\ | |
2278 tmp++;\ | |
2279 }\ | |
2280 }\ | |
2281 \ | |
2282 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2283 const int h=8;\ | |
4176 | 2284 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2285 int i;\ |
2286 for(i=0; i<h; i++)\ | |
2287 {\ | |
2288 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\ | |
2289 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\ | |
2290 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\ | |
2291 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\ | |
2292 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\ | |
2293 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\ | |
2294 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\ | |
2295 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\ | |
2296 dst+=dstStride;\ | |
2297 src+=srcStride;\ | |
2298 }\ | |
2299 }\ | |
2300 \ | |
2301 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2302 const int w=8;\ | |
4176 | 2303 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2304 int i;\ |
2305 for(i=0; i<w; i++)\ | |
2306 {\ | |
2307 const int srcB= src[-2*srcStride];\ | |
2308 const int srcA= src[-1*srcStride];\ | |
2309 const int src0= src[0 *srcStride];\ | |
2310 const int src1= src[1 *srcStride];\ | |
2311 const int src2= src[2 *srcStride];\ | |
2312 const int src3= src[3 *srcStride];\ | |
2313 const int src4= src[4 *srcStride];\ | |
2314 const int src5= src[5 *srcStride];\ | |
2315 const int src6= src[6 *srcStride];\ | |
2316 const int src7= src[7 *srcStride];\ | |
2317 const int src8= src[8 *srcStride];\ | |
2318 const int src9= src[9 *srcStride];\ | |
2319 const int src10=src[10*srcStride];\ | |
2320 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
2321 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
2322 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
2323 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
2324 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ | |
2325 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ | |
2326 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ | |
2327 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ | |
2328 dst++;\ | |
2329 src++;\ | |
2330 }\ | |
2331 }\ | |
2332 \ | |
2333 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2334 const int h=8;\ | |
2335 const int w=8;\ | |
4176 | 2336 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2337 int i;\ |
2338 src -= 2*srcStride;\ | |
2339 for(i=0; i<h+5; i++)\ | |
2340 {\ | |
2341 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\ | |
2342 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\ | |
2343 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\ | |
2344 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\ | |
2345 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\ | |
2346 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\ | |
2347 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\ | |
2348 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\ | |
2349 tmp+=tmpStride;\ | |
2350 src+=srcStride;\ | |
2351 }\ | |
2352 tmp -= tmpStride*(h+5-2);\ | |
2353 for(i=0; i<w; i++)\ | |
2354 {\ | |
2355 const int tmpB= tmp[-2*tmpStride];\ | |
2356 const int tmpA= tmp[-1*tmpStride];\ | |
2357 const int tmp0= tmp[0 *tmpStride];\ | |
2358 const int tmp1= tmp[1 *tmpStride];\ | |
2359 const int tmp2= tmp[2 *tmpStride];\ | |
2360 const int tmp3= tmp[3 *tmpStride];\ | |
2361 const int tmp4= tmp[4 *tmpStride];\ | |
2362 const int tmp5= tmp[5 *tmpStride];\ | |
2363 const int tmp6= tmp[6 *tmpStride];\ | |
2364 const int tmp7= tmp[7 *tmpStride];\ | |
2365 const int tmp8= tmp[8 *tmpStride];\ | |
2366 const int tmp9= tmp[9 *tmpStride];\ | |
2367 const int tmp10=tmp[10*tmpStride];\ | |
2368 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |
2369 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |
2370 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | |
2371 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | |
2372 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\ | |
2373 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\ | |
2374 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\ | |
2375 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\ | |
2376 dst++;\ | |
2377 tmp++;\ | |
2378 }\ | |
2379 }\ | |
2380 \ | |
2381 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2382 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | |
2383 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2384 src += 8*srcStride;\ | |
2385 dst += 8*dstStride;\ | |
2386 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | |
2387 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2388 }\ | |
2389 \ | |
2390 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2391 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | |
2392 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2393 src += 8*srcStride;\ | |
2394 dst += 8*dstStride;\ | |
2395 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | |
2396 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2397 }\ | |
2398 \ | |
2399 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2400 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | |
2401 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | |
2402 src += 8*srcStride;\ | |
2403 dst += 8*dstStride;\ | |
2404 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | |
2405 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | |
2406 }\ | |
2407 | |
2408 #define H264_MC(OPNAME, SIZE) \ | |
12423 | 2409 static av_unused void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\ |
1168 | 2410 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ |
2411 }\ | |
2412 \ | |
2413 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2414 uint8_t half[SIZE*SIZE];\ | |
2415 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
2416 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\ | |
2417 }\ | |
2418 \ | |
2419 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2420 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ | |
2421 }\ | |
2422 \ | |
2423 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2424 uint8_t half[SIZE*SIZE];\ | |
2425 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
2426 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\ | |
2427 }\ | |
2428 \ | |
2429 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2430 uint8_t full[SIZE*(SIZE+5)];\ | |
2431 uint8_t * const full_mid= full + SIZE*2;\ | |
2432 uint8_t half[SIZE*SIZE];\ | |
2433 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2434 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
2435 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ | |
2436 }\ | |
2437 \ | |
2438 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2439 uint8_t full[SIZE*(SIZE+5)];\ | |
2440 uint8_t * const full_mid= full + SIZE*2;\ | |
2441 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2442 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ | |
2443 }\ | |
2444 \ | |
2445 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2446 uint8_t full[SIZE*(SIZE+5)];\ | |
2447 uint8_t * const full_mid= full + SIZE*2;\ | |
2448 uint8_t half[SIZE*SIZE];\ | |
2449 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2450 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
2451 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ | |
2452 }\ | |
2453 \ | |
2454 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2455 uint8_t full[SIZE*(SIZE+5)];\ | |
2456 uint8_t * const full_mid= full + SIZE*2;\ | |
2457 uint8_t halfH[SIZE*SIZE];\ | |
2458 uint8_t halfV[SIZE*SIZE];\ | |
2459 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2460 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2461 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2462 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2463 }\ | |
2464 \ | |
2465 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2466 uint8_t full[SIZE*(SIZE+5)];\ | |
2467 uint8_t * const full_mid= full + SIZE*2;\ | |
2468 uint8_t halfH[SIZE*SIZE];\ | |
2469 uint8_t halfV[SIZE*SIZE];\ | |
2470 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2471 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2472 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2473 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2474 }\ | |
2475 \ | |
2476 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2477 uint8_t full[SIZE*(SIZE+5)];\ | |
2478 uint8_t * const full_mid= full + SIZE*2;\ | |
2479 uint8_t halfH[SIZE*SIZE];\ | |
2480 uint8_t halfV[SIZE*SIZE];\ | |
2481 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2482 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2483 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2484 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2485 }\ | |
2486 \ | |
2487 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2488 uint8_t full[SIZE*(SIZE+5)];\ | |
2489 uint8_t * const full_mid= full + SIZE*2;\ | |
2490 uint8_t halfH[SIZE*SIZE];\ | |
2491 uint8_t halfV[SIZE*SIZE];\ | |
2492 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2493 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2494 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2495 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2496 }\ | |
2497 \ | |
2498 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2499 int16_t tmp[SIZE*(SIZE+5)];\ | |
2500 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ | |
2501 }\ | |
2502 \ | |
2503 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2504 int16_t tmp[SIZE*(SIZE+5)];\ | |
2505 uint8_t halfH[SIZE*SIZE];\ | |
2506 uint8_t halfHV[SIZE*SIZE];\ | |
2507 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2508 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2509 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2510 }\ | |
2511 \ | |
2512 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2513 int16_t tmp[SIZE*(SIZE+5)];\ | |
2514 uint8_t halfH[SIZE*SIZE];\ | |
2515 uint8_t halfHV[SIZE*SIZE];\ | |
2516 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2517 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2518 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2519 }\ | |
2520 \ | |
2521 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2522 uint8_t full[SIZE*(SIZE+5)];\ | |
2523 uint8_t * const full_mid= full + SIZE*2;\ | |
2524 int16_t tmp[SIZE*(SIZE+5)];\ | |
2525 uint8_t halfV[SIZE*SIZE];\ | |
2526 uint8_t halfHV[SIZE*SIZE];\ | |
2527 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2528 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2529 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2530 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2531 }\ | |
2532 \ | |
2533 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2534 uint8_t full[SIZE*(SIZE+5)];\ | |
2535 uint8_t * const full_mid= full + SIZE*2;\ | |
2536 int16_t tmp[SIZE*(SIZE+5)];\ | |
2537 uint8_t halfV[SIZE*SIZE];\ | |
2538 uint8_t halfHV[SIZE*SIZE];\ | |
2539 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2540 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2541 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2542 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2543 }\ | |
2544 | |
2545 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) | |
2546 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) | |
2547 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
2548 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) | |
2549 #define op2_put(a, b) a = cm[((b) + 512)>>10] | |
2550 | |
2551 H264_LOWPASS(put_ , op_put, op2_put) | |
2552 H264_LOWPASS(avg_ , op_avg, op2_avg) | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2553 H264_MC(put_, 2) |
1168 | 2554 H264_MC(put_, 4) |
2555 H264_MC(put_, 8) | |
2556 H264_MC(put_, 16) | |
2557 H264_MC(avg_, 4) | |
2558 H264_MC(avg_, 8) | |
2559 H264_MC(avg_, 16) | |
2560 | |
2561 #undef op_avg | |
2562 #undef op_put | |
2563 #undef op2_avg | |
2564 #undef op2_put | |
2565 #endif | |
2566 | |
12423 | 2567 #define put_h264_qpel8_mc00_c ff_put_pixels8x8_c |
2568 #define avg_h264_qpel8_mc00_c ff_avg_pixels8x8_c | |
2569 #define put_h264_qpel16_mc00_c ff_put_pixels16x16_c | |
2570 #define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c | |
2571 | |
936 | 2572 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ |
4176 | 2573 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
936 | 2574 int i; |
2575 | |
2576 for(i=0; i<h; i++){ | |
2577 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4]; | |
2578 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4]; | |
2579 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4]; | |
2580 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4]; | |
2581 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4]; | |
2582 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; | |
2583 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; | |
2584 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; | |
2585 dst+=dstStride; | |
2967 | 2586 src+=srcStride; |
936 | 2587 } |
2588 } | |
2589 | |
12423 | 2590 void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) { |
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2591 put_pixels8_c(dst, src, stride, 8); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2592 } |
12423 | 2593 void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) { |
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2594 avg_pixels8_c(dst, src, stride, 8); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2595 } |
12423 | 2596 void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) { |
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2597 put_pixels16_c(dst, src, stride, 16); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2598 } |
12423 | 2599 void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) { |
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2600 avg_pixels16_c(dst, src, stride, 16); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2601 } |
3526 | 2602 |
8590 | 2603 #if CONFIG_RV40_DECODER |
8232 | 2604 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ |
2605 put_pixels16_xy2_c(dst, src, stride, 16); | |
2606 } | |
2607 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | |
2608 avg_pixels16_xy2_c(dst, src, stride, 16); | |
2609 } | |
2610 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | |
2611 put_pixels8_xy2_c(dst, src, stride, 8); | |
2612 } | |
2613 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | |
2614 avg_pixels8_xy2_c(dst, src, stride, 8); | |
2615 } | |
2616 #endif /* CONFIG_RV40_DECODER */ | |
2617 | |
936 | 2618 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ |
4176 | 2619 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
936 | 2620 int i; |
2621 | |
2622 for(i=0; i<w; i++){ | |
2623 const int src_1= src[ -srcStride]; | |
2624 const int src0 = src[0 ]; | |
2625 const int src1 = src[ srcStride]; | |
2626 const int src2 = src[2*srcStride]; | |
2627 const int src3 = src[3*srcStride]; | |
2628 const int src4 = src[4*srcStride]; | |
2629 const int src5 = src[5*srcStride]; | |
2630 const int src6 = src[6*srcStride]; | |
2631 const int src7 = src[7*srcStride]; | |
2632 const int src8 = src[8*srcStride]; | |
2633 const int src9 = src[9*srcStride]; | |
2634 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; | |
2635 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; | |
2636 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; | |
2637 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; | |
2638 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; | |
2639 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; | |
2640 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; | |
2641 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; | |
2642 src++; | |
2643 dst++; | |
2644 } | |
2645 } | |
2646 | |
2647 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ | |
2648 uint8_t half[64]; | |
2649 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | |
2650 put_pixels8_l2(dst, src, half, stride, stride, 8, 8); | |
2651 } | |
2652 | |
2653 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ | |
2654 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); | |
2655 } | |
2656 | |
2657 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ | |
2658 uint8_t half[64]; | |
2659 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | |
2660 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8); | |
2661 } | |
2662 | |
2663 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ | |
2664 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); | |
2665 } | |
2666 | |
2667 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ | |
2668 uint8_t halfH[88]; | |
2669 uint8_t halfV[64]; | |
2670 uint8_t halfHV[64]; | |
2671 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2672 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); | |
2673 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | |
2674 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | |
2675 } | |
2676 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ | |
2677 uint8_t halfH[88]; | |
2678 uint8_t halfV[64]; | |
2679 uint8_t halfHV[64]; | |
2680 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2681 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); | |
2682 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | |
2683 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | |
2684 } | |
2685 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ | |
2686 uint8_t halfH[88]; | |
2687 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2688 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); | |
2689 } | |
2690 | |
1644 | 2691 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){ |
10749
5cca4b6c459d
Get rid of pointless CONFIG_ANY_H263 preprocessor definition.
diego
parents:
10748
diff
changeset
|
2692 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { |
1644 | 2693 int x; |
2694 const int strength= ff_h263_loop_filter_strength[qscale]; | |
2967 | 2695 |
1644 | 2696 for(x=0; x<8; x++){ |
2697 int d1, d2, ad1; | |
2698 int p0= src[x-2*stride]; | |
2699 int p1= src[x-1*stride]; | |
2700 int p2= src[x+0*stride]; | |
2701 int p3= src[x+1*stride]; | |
2702 int d = (p0 - p3 + 4*(p2 - p1)) / 8; | |
2703 | |
2704 if (d<-2*strength) d1= 0; | |
2705 else if(d<- strength) d1=-2*strength - d; | |
2706 else if(d< strength) d1= d; | |
2707 else if(d< 2*strength) d1= 2*strength - d; | |
2708 else d1= 0; | |
2967 | 2709 |
1644 | 2710 p1 += d1; |
2711 p2 -= d1; | |
2712 if(p1&256) p1= ~(p1>>31); | |
2713 if(p2&256) p2= ~(p2>>31); | |
2967 | 2714 |
1644 | 2715 src[x-1*stride] = p1; |
2716 src[x+0*stride] = p2; | |
2717 | |
4001 | 2718 ad1= FFABS(d1)>>1; |
2967 | 2719 |
4594 | 2720 d2= av_clip((p0-p3)/4, -ad1, ad1); |
2967 | 2721 |
1644 | 2722 src[x-2*stride] = p0 - d2; |
2723 src[x+ stride] = p3 + d2; | |
2724 } | |
5394
e9a6215f4e3a
help some gcc version to optimize out those functions
aurel
parents:
5291
diff
changeset
|
2725 } |
1644 | 2726 } |
2727 | |
2728 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ | |
10749
5cca4b6c459d
Get rid of pointless CONFIG_ANY_H263 preprocessor definition.
diego
parents:
10748
diff
changeset
|
2729 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { |
1644 | 2730 int y; |
2731 const int strength= ff_h263_loop_filter_strength[qscale]; | |
2967 | 2732 |
1644 | 2733 for(y=0; y<8; y++){ |
2734 int d1, d2, ad1; | |
2735 int p0= src[y*stride-2]; | |
2736 int p1= src[y*stride-1]; | |
2737 int p2= src[y*stride+0]; | |
2738 int p3= src[y*stride+1]; | |
2739 int d = (p0 - p3 + 4*(p2 - p1)) / 8; | |
2740 | |
2741 if (d<-2*strength) d1= 0; | |
2742 else if(d<- strength) d1=-2*strength - d; | |
2743 else if(d< strength) d1= d; | |
2744 else if(d< 2*strength) d1= 2*strength - d; | |
2745 else d1= 0; | |
2967 | 2746 |
1644 | 2747 p1 += d1; |
2748 p2 -= d1; | |
2749 if(p1&256) p1= ~(p1>>31); | |
2750 if(p2&256) p2= ~(p2>>31); | |
2967 | 2751 |
1644 | 2752 src[y*stride-1] = p1; |
2753 src[y*stride+0] = p2; | |
2754 | |
4001 | 2755 ad1= FFABS(d1)>>1; |
2967 | 2756 |
4594 | 2757 d2= av_clip((p0-p3)/4, -ad1, ad1); |
2967 | 2758 |
1644 | 2759 src[y*stride-2] = p0 - d2; |
2760 src[y*stride+1] = p3 + d2; | |
2761 } | |
5394
e9a6215f4e3a
help some gcc version to optimize out those functions
aurel
parents:
5291
diff
changeset
|
2762 } |
1644 | 2763 } |
936 | 2764 |
2045 | 2765 static void h261_loop_filter_c(uint8_t *src, int stride){ |
2766 int x,y,xy,yz; | |
2767 int temp[64]; | |
2768 | |
2769 for(x=0; x<8; x++){ | |
2770 temp[x ] = 4*src[x ]; | |
2771 temp[x + 7*8] = 4*src[x + 7*stride]; | |
2772 } | |
2773 for(y=1; y<7; y++){ | |
2774 for(x=0; x<8; x++){ | |
2775 xy = y * stride + x; | |
2776 yz = y * 8 + x; | |
2777 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride]; | |
2044
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2778 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2779 } |
2967 | 2780 |
2045 | 2781 for(y=0; y<8; y++){ |
2782 src[ y*stride] = (temp[ y*8] + 2)>>2; | |
2783 src[7+y*stride] = (temp[7+y*8] + 2)>>2; | |
2784 for(x=1; x<7; x++){ | |
2785 xy = y * stride + x; | |
2786 yz = y * 8 + x; | |
2787 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4; | |
2044
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2788 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2789 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2790 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2791 |
1708 | 2792 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2793 { |
2794 int s, i; | |
2795 | |
2796 s = 0; | |
1708 | 2797 for(i=0;i<h;i++) { |
0 | 2798 s += abs(pix1[0] - pix2[0]); |
2799 s += abs(pix1[1] - pix2[1]); | |
2800 s += abs(pix1[2] - pix2[2]); | |
2801 s += abs(pix1[3] - pix2[3]); | |
2802 s += abs(pix1[4] - pix2[4]); | |
2803 s += abs(pix1[5] - pix2[5]); | |
2804 s += abs(pix1[6] - pix2[6]); | |
2805 s += abs(pix1[7] - pix2[7]); | |
2806 s += abs(pix1[8] - pix2[8]); | |
2807 s += abs(pix1[9] - pix2[9]); | |
2808 s += abs(pix1[10] - pix2[10]); | |
2809 s += abs(pix1[11] - pix2[11]); | |
2810 s += abs(pix1[12] - pix2[12]); | |
2811 s += abs(pix1[13] - pix2[13]); | |
2812 s += abs(pix1[14] - pix2[14]); | |
2813 s += abs(pix1[15] - pix2[15]); | |
2814 pix1 += line_size; | |
2815 pix2 += line_size; | |
2816 } | |
2817 return s; | |
2818 } | |
2819 | |
1708 | 2820 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2821 { |
2822 int s, i; | |
2823 | |
2824 s = 0; | |
1708 | 2825 for(i=0;i<h;i++) { |
0 | 2826 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
2827 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
2828 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
2829 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
2830 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
2831 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
2832 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
2833 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
2834 s += abs(pix1[8] - avg2(pix2[8], pix2[9])); | |
2835 s += abs(pix1[9] - avg2(pix2[9], pix2[10])); | |
2836 s += abs(pix1[10] - avg2(pix2[10], pix2[11])); | |
2837 s += abs(pix1[11] - avg2(pix2[11], pix2[12])); | |
2838 s += abs(pix1[12] - avg2(pix2[12], pix2[13])); | |
2839 s += abs(pix1[13] - avg2(pix2[13], pix2[14])); | |
2840 s += abs(pix1[14] - avg2(pix2[14], pix2[15])); | |
2841 s += abs(pix1[15] - avg2(pix2[15], pix2[16])); | |
2842 pix1 += line_size; | |
2843 pix2 += line_size; | |
2844 } | |
2845 return s; | |
2846 } | |
2847 | |
1708 | 2848 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2849 { |
2850 int s, i; | |
1064 | 2851 uint8_t *pix3 = pix2 + line_size; |
0 | 2852 |
2853 s = 0; | |
1708 | 2854 for(i=0;i<h;i++) { |
0 | 2855 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
2856 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
2857 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
2858 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
2859 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
2860 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
2861 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
2862 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
2863 s += abs(pix1[8] - avg2(pix2[8], pix3[8])); | |
2864 s += abs(pix1[9] - avg2(pix2[9], pix3[9])); | |
2865 s += abs(pix1[10] - avg2(pix2[10], pix3[10])); | |
2866 s += abs(pix1[11] - avg2(pix2[11], pix3[11])); | |
2867 s += abs(pix1[12] - avg2(pix2[12], pix3[12])); | |
2868 s += abs(pix1[13] - avg2(pix2[13], pix3[13])); | |
2869 s += abs(pix1[14] - avg2(pix2[14], pix3[14])); | |
2870 s += abs(pix1[15] - avg2(pix2[15], pix3[15])); | |
2871 pix1 += line_size; | |
2872 pix2 += line_size; | |
2873 pix3 += line_size; | |
2874 } | |
2875 return s; | |
2876 } | |
2877 | |
1708 | 2878 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2879 { |
2880 int s, i; | |
1064 | 2881 uint8_t *pix3 = pix2 + line_size; |
0 | 2882 |
2883 s = 0; | |
1708 | 2884 for(i=0;i<h;i++) { |
0 | 2885 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
2886 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
2887 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
2888 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
2889 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
2890 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
2891 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
2892 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
2893 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); | |
2894 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); | |
2895 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); | |
2896 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); | |
2897 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); | |
2898 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); | |
2899 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); | |
2900 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); | |
2901 pix1 += line_size; | |
2902 pix2 += line_size; | |
2903 pix3 += line_size; | |
2904 } | |
2905 return s; | |
2906 } | |
2907 | |
1708 | 2908 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2909 { |
2910 int s, i; | |
2911 | |
2912 s = 0; | |
1708 | 2913 for(i=0;i<h;i++) { |
294 | 2914 s += abs(pix1[0] - pix2[0]); |
2915 s += abs(pix1[1] - pix2[1]); | |
2916 s += abs(pix1[2] - pix2[2]); | |
2917 s += abs(pix1[3] - pix2[3]); | |
2918 s += abs(pix1[4] - pix2[4]); | |
2919 s += abs(pix1[5] - pix2[5]); | |
2920 s += abs(pix1[6] - pix2[6]); | |
2921 s += abs(pix1[7] - pix2[7]); | |
2922 pix1 += line_size; | |
2923 pix2 += line_size; | |
2924 } | |
2925 return s; | |
2926 } | |
2927 | |
1708 | 2928 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2929 { |
2930 int s, i; | |
2931 | |
2932 s = 0; | |
1708 | 2933 for(i=0;i<h;i++) { |
294 | 2934 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
2935 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
2936 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
2937 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
2938 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
2939 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
2940 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
2941 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
2942 pix1 += line_size; | |
2943 pix2 += line_size; | |
2944 } | |
2945 return s; | |
2946 } | |
2947 | |
1708 | 2948 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2949 { |
2950 int s, i; | |
1064 | 2951 uint8_t *pix3 = pix2 + line_size; |
294 | 2952 |
2953 s = 0; | |
1708 | 2954 for(i=0;i<h;i++) { |
294 | 2955 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
2956 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
2957 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
2958 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
2959 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
2960 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
2961 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
2962 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
2963 pix1 += line_size; | |
2964 pix2 += line_size; | |
2965 pix3 += line_size; | |
2966 } | |
2967 return s; | |
2968 } | |
2969 | |
1708 | 2970 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2971 { |
2972 int s, i; | |
1064 | 2973 uint8_t *pix3 = pix2 + line_size; |
294 | 2974 |
2975 s = 0; | |
1708 | 2976 for(i=0;i<h;i++) { |
294 | 2977 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
2978 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
2979 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
2980 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
2981 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
2982 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
2983 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
2984 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
2985 pix1 += line_size; | |
2986 pix2 += line_size; | |
2987 pix3 += line_size; | |
2988 } | |
2989 return s; | |
2990 } | |
2991 | |
2834 | 2992 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ |
2993 MpegEncContext *c = v; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2994 int score1=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2995 int score2=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2996 int x,y; |
2066 | 2997 |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2998 for(y=0; y<h; y++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2999 for(x=0; x<16; x++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3000 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3001 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3002 if(y+1<h){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3003 for(x=0; x<15; x++){ |
4001 | 3004 score2+= FFABS( s1[x ] - s1[x +stride] |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3005 - s1[x+1] + s1[x+1+stride]) |
4001 | 3006 -FFABS( s2[x ] - s2[x +stride] |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3007 - s2[x+1] + s2[x+1+stride]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3008 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3009 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3010 s1+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3011 s2+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3012 } |
2066 | 3013 |
4001 | 3014 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; |
3015 else return score1 + FFABS(score2)*8; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3016 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3017 |
2834 | 3018 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ |
3019 MpegEncContext *c = v; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3020 int score1=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3021 int score2=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3022 int x,y; |
2967 | 3023 |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3024 for(y=0; y<h; y++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3025 for(x=0; x<8; x++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3026 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3027 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3028 if(y+1<h){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3029 for(x=0; x<7; x++){ |
4001 | 3030 score2+= FFABS( s1[x ] - s1[x +stride] |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3031 - s1[x+1] + s1[x+1+stride]) |
4001 | 3032 -FFABS( s2[x ] - s2[x +stride] |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3033 - s2[x+1] + s2[x+1+stride]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3034 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3035 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3036 s1+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3037 s2+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3038 } |
2967 | 3039 |
4001 | 3040 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; |
3041 else return score1 + FFABS(score2)*8; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3042 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3043 |
1784 | 3044 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ |
3045 int i; | |
3046 unsigned int sum=0; | |
3047 | |
3048 for(i=0; i<8*8; i++){ | |
3049 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT)); | |
3050 int w= weight[i]; | |
3051 b>>= RECON_SHIFT; | |
3052 assert(-512<b && b<512); | |
3053 | |
3054 sum += (w*b)*(w*b)>>4; | |
3055 } | |
3056 return sum>>2; | |
3057 } | |
3058 | |
3059 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){ | |
3060 int i; | |
3061 | |
3062 for(i=0; i<8*8; i++){ | |
3063 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT); | |
2967 | 3064 } |
1784 | 3065 } |
3066 | |
1100 | 3067 /** |
3068 * permutes an 8x8 block. | |
1101 | 3069 * @param block the block which will be permuted according to the given permutation vector |
1100 | 3070 * @param permutation the permutation vector |
3071 * @param last the last non zero coefficient in scantable order, used to speed the permutation up | |
2967 | 3072 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not |
1101 | 3073 * (inverse) permutated to scantable order! |
1100 | 3074 */ |
1064 | 3075 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last) |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
3076 { |
764 | 3077 int i; |
945 | 3078 DCTELEM temp[64]; |
2967 | 3079 |
764 | 3080 if(last<=0) return; |
5129 | 3081 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
3082 |
764 | 3083 for(i=0; i<=last; i++){ |
3084 const int j= scantable[i]; | |
3085 temp[j]= block[j]; | |
3086 block[j]=0; | |
3087 } | |
2967 | 3088 |
764 | 3089 for(i=0; i<=last; i++){ |
3090 const int j= scantable[i]; | |
3091 const int perm_j= permutation[j]; | |
3092 block[perm_j]= temp[j]; | |
3093 } | |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
3094 } |
34 | 3095 |
1729 | 3096 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ |
3097 return 0; | |
3098 } | |
3099 | |
3100 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ | |
3101 int i; | |
2967 | 3102 |
8976
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
3103 memset(cmp, 0, sizeof(void*)*6); |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
3104 |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
3105 for(i=0; i<6; i++){ |
1729 | 3106 switch(type&0xFF){ |
3107 case FF_CMP_SAD: | |
3108 cmp[i]= c->sad[i]; | |
3109 break; | |
3110 case FF_CMP_SATD: | |
3111 cmp[i]= c->hadamard8_diff[i]; | |
3112 break; | |
3113 case FF_CMP_SSE: | |
3114 cmp[i]= c->sse[i]; | |
3115 break; | |
3116 case FF_CMP_DCT: | |
3117 cmp[i]= c->dct_sad[i]; | |
3118 break; | |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3119 case FF_CMP_DCT264: |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3120 cmp[i]= c->dct264_sad[i]; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3121 break; |
2382 | 3122 case FF_CMP_DCTMAX: |
3123 cmp[i]= c->dct_max[i]; | |
3124 break; | |
1729 | 3125 case FF_CMP_PSNR: |
3126 cmp[i]= c->quant_psnr[i]; | |
3127 break; | |
3128 case FF_CMP_BIT: | |
3129 cmp[i]= c->bit[i]; | |
3130 break; | |
3131 case FF_CMP_RD: | |
3132 cmp[i]= c->rd[i]; | |
3133 break; | |
3134 case FF_CMP_VSAD: | |
3135 cmp[i]= c->vsad[i]; | |
3136 break; | |
3137 case FF_CMP_VSSE: | |
3138 cmp[i]= c->vsse[i]; | |
3139 break; | |
3140 case FF_CMP_ZERO: | |
3141 cmp[i]= zero_cmp; | |
3142 break; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3143 case FF_CMP_NSSE: |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3144 cmp[i]= c->nsse[i]; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3145 break; |
11485 | 3146 #if CONFIG_DWT |
2184 | 3147 case FF_CMP_W53: |
3148 cmp[i]= c->w53[i]; | |
3149 break; | |
3150 case FF_CMP_W97: | |
3151 cmp[i]= c->w97[i]; | |
3152 break; | |
3373
b8996cc5ccae
Disable w53 and w97 cmp methods when snow encoder is disabled
gpoirier
parents:
3323
diff
changeset
|
3153 #endif |
1729 | 3154 default: |
3155 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); | |
3156 } | |
3157 } | |
3158 } | |
3159 | |
8288 | 3160 static void clear_block_c(DCTELEM *block) |
3161 { | |
3162 memset(block, 0, sizeof(DCTELEM)*64); | |
3163 } | |
3164 | |
1101 | 3165 /** |
3166 * memset(blocks, 0, sizeof(DCTELEM)*6*64) | |
3167 */ | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3168 static void clear_blocks_c(DCTELEM *blocks) |
296 | 3169 { |
3170 memset(blocks, 0, sizeof(DCTELEM)*6*64); | |
3171 } | |
3172 | |
866 | 3173 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ |
6385 | 3174 long i; |
3175 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ | |
3176 long a = *(long*)(src+i); | |
3177 long b = *(long*)(dst+i); | |
3178 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); | |
866 | 3179 } |
3180 for(; i<w; i++) | |
3181 dst[i+0] += src[i+0]; | |
3182 } | |
3183 | |
6384 | 3184 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ |
6385 | 3185 long i; |
6384 | 3186 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ |
3187 long a = *(long*)(src1+i); | |
3188 long b = *(long*)(src2+i); | |
6385 | 3189 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); |
6384 | 3190 } |
3191 for(; i<w; i++) | |
3192 dst[i] = src1[i]+src2[i]; | |
3193 } | |
3194 | |
866 | 3195 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ |
6385 | 3196 long i; |
8590 | 3197 #if !HAVE_FAST_UNALIGNED |
6385 | 3198 if((long)src2 & (sizeof(long)-1)){ |
6386 | 3199 for(i=0; i+7<w; i+=8){ |
3200 dst[i+0] = src1[i+0]-src2[i+0]; | |
3201 dst[i+1] = src1[i+1]-src2[i+1]; | |
3202 dst[i+2] = src1[i+2]-src2[i+2]; | |
3203 dst[i+3] = src1[i+3]-src2[i+3]; | |
3204 dst[i+4] = src1[i+4]-src2[i+4]; | |
3205 dst[i+5] = src1[i+5]-src2[i+5]; | |
3206 dst[i+6] = src1[i+6]-src2[i+6]; | |
3207 dst[i+7] = src1[i+7]-src2[i+7]; | |
3208 } | |
6385 | 3209 }else |
3210 #endif | |
3211 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ | |
3212 long a = *(long*)(src1+i); | |
3213 long b = *(long*)(src2+i); | |
3214 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80); | |
3215 } | |
866 | 3216 for(; i<w; i++) |
3217 dst[i+0] = src1[i+0]-src2[i+0]; | |
3218 } | |
3219 | |
10431 | 3220 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){ |
8760 | 3221 int i; |
3222 uint8_t l, lt; | |
3223 | |
3224 l= *left; | |
3225 lt= *left_top; | |
3226 | |
3227 for(i=0; i<w; i++){ | |
3228 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i]; | |
3229 lt= src1[i]; | |
3230 dst[i]= l; | |
3231 } | |
3232 | |
3233 *left= l; | |
3234 *left_top= lt; | |
3235 } | |
3236 | |
10431 | 3237 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){ |
1527 | 3238 int i; |
3239 uint8_t l, lt; | |
3240 | |
3241 l= *left; | |
3242 lt= *left_top; | |
3243 | |
3244 for(i=0; i<w; i++){ | |
3245 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); | |
3246 lt= src1[i]; | |
3247 l= src2[i]; | |
3248 dst[i]= l - pred; | |
2967 | 3249 } |
1527 | 3250 |
3251 *left= l; | |
3252 *left_top= lt; | |
3253 } | |
3254 | |
10420
442ab0c41eae
Huffyuv: Add missing const to src pointers in dsputil functions.
astrange
parents:
10370
diff
changeset
|
3255 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){ |
10370 | 3256 int i; |
3257 | |
3258 for(i=0; i<w-1; i++){ | |
3259 acc+= src[i]; | |
3260 dst[i]= acc; | |
3261 i++; | |
3262 acc+= src[i]; | |
3263 dst[i]= acc; | |
3264 } | |
3265 | |
3266 for(; i<w; i++){ | |
3267 acc+= src[i]; | |
3268 dst[i]= acc; | |
3269 } | |
3270 | |
3271 return acc; | |
3272 } | |
3273 | |
3274 #if HAVE_BIGENDIAN | |
3275 #define B 3 | |
3276 #define G 2 | |
3277 #define R 1 | |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3278 #define A 0 |
10370 | 3279 #else |
3280 #define B 0 | |
3281 #define G 1 | |
3282 #define R 2 | |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3283 #define A 3 |
10370 | 3284 #endif |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3285 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){ |
10370 | 3286 int i; |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3287 int r,g,b,a; |
10370 | 3288 r= *red; |
3289 g= *green; | |
3290 b= *blue; | |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3291 a= *alpha; |
10370 | 3292 |
3293 for(i=0; i<w; i++){ | |
3294 b+= src[4*i+B]; | |
3295 g+= src[4*i+G]; | |
3296 r+= src[4*i+R]; | |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3297 a+= src[4*i+A]; |
10370 | 3298 |
3299 dst[4*i+B]= b; | |
3300 dst[4*i+G]= g; | |
3301 dst[4*i+R]= r; | |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3302 dst[4*i+A]= a; |
10370 | 3303 } |
3304 | |
3305 *red= r; | |
3306 *green= g; | |
3307 *blue= b; | |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3308 *alpha= a; |
10370 | 3309 } |
3310 #undef B | |
3311 #undef G | |
3312 #undef R | |
10878
a8620b001ed3
Implement alpha channel decoding for BGR HuffYUV.
astrange
parents:
10867
diff
changeset
|
3313 #undef A |
10370 | 3314 |
936 | 3315 #define BUTTERFLY2(o1,o2,i1,i2) \ |
3316 o1= (i1)+(i2);\ | |
3317 o2= (i1)-(i2); | |
3318 | |
3319 #define BUTTERFLY1(x,y) \ | |
3320 {\ | |
3321 int a,b;\ | |
3322 a= x;\ | |
3323 b= y;\ | |
3324 x= a+b;\ | |
3325 y= a-b;\ | |
3326 } | |
3327 | |
4001 | 3328 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y))) |
936 | 3329 |
1708 | 3330 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ |
936 | 3331 int i; |
3332 int temp[64]; | |
3333 int sum=0; | |
2967 | 3334 |
1708 | 3335 assert(h==8); |
936 | 3336 |
3337 for(i=0; i<8; i++){ | |
3338 //FIXME try pointer walks | |
3339 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]); | |
3340 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]); | |
3341 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]); | |
3342 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]); | |
2967 | 3343 |
936 | 3344 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); |
3345 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | |
3346 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | |
3347 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | |
2967 | 3348 |
936 | 3349 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); |
3350 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | |
3351 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | |
3352 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | |
3353 } | |
3354 | |
3355 for(i=0; i<8; i++){ | |
3356 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | |
3357 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | |
3358 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | |
3359 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | |
2967 | 3360 |
936 | 3361 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); |
3362 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | |
3363 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | |
3364 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | |
3365 | |
2967 | 3366 sum += |
936 | 3367 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) |
3368 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | |
3369 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | |
3370 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | |
3371 } | |
3372 #if 0 | |
3373 static int maxi=0; | |
3374 if(sum>maxi){ | |
3375 maxi=sum; | |
3376 printf("MAX:%d\n", maxi); | |
3377 } | |
3378 #endif | |
3379 return sum; | |
3380 } | |
3381 | |
1729 | 3382 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){ |
936 | 3383 int i; |
3384 int temp[64]; | |
3385 int sum=0; | |
2967 | 3386 |
1729 | 3387 assert(h==8); |
2967 | 3388 |
936 | 3389 for(i=0; i<8; i++){ |
3390 //FIXME try pointer walks | |
1729 | 3391 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]); |
3392 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]); | |
3393 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]); | |
3394 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]); | |
2967 | 3395 |
936 | 3396 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); |
3397 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | |
3398 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | |
3399 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | |
2967 | 3400 |
936 | 3401 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); |
3402 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | |
3403 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | |
3404 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | |
3405 } | |
3406 | |
3407 for(i=0; i<8; i++){ | |
3408 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | |
3409 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | |
3410 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | |
3411 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | |
2967 | 3412 |
936 | 3413 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); |
3414 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | |
3415 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | |
3416 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | |
2967 | 3417 |
3418 sum += | |
936 | 3419 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) |
3420 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | |
3421 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | |
3422 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | |
3423 } | |
2967 | 3424 |
4001 | 3425 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean |
2967 | 3426 |
936 | 3427 return sum; |
3428 } | |
3429 | |
1708 | 3430 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
936 | 3431 MpegEncContext * const s= (MpegEncContext *)c; |
11195 | 3432 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
2967 | 3433 |
1708 | 3434 assert(h==8); |
936 | 3435 |
3436 s->dsp.diff_pixels(temp, src1, src2, stride); | |
1092 | 3437 s->dsp.fdct(temp); |
4988
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
3438 return s->dsp.sum_abs_dctelem(temp); |
936 | 3439 } |
3440 | |
8590 | 3441 #if CONFIG_GPL |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3442 #define DCT8_1D {\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3443 const int s07 = SRC(0) + SRC(7);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3444 const int s16 = SRC(1) + SRC(6);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3445 const int s25 = SRC(2) + SRC(5);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3446 const int s34 = SRC(3) + SRC(4);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3447 const int a0 = s07 + s34;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3448 const int a1 = s16 + s25;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3449 const int a2 = s07 - s34;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3450 const int a3 = s16 - s25;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3451 const int d07 = SRC(0) - SRC(7);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3452 const int d16 = SRC(1) - SRC(6);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3453 const int d25 = SRC(2) - SRC(5);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3454 const int d34 = SRC(3) - SRC(4);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3455 const int a4 = d16 + d25 + (d07 + (d07>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3456 const int a5 = d07 - d34 - (d25 + (d25>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3457 const int a6 = d07 + d34 - (d16 + (d16>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3458 const int a7 = d16 - d25 + (d34 + (d34>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3459 DST(0, a0 + a1 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3460 DST(1, a4 + (a7>>2)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3461 DST(2, a2 + (a3>>1)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3462 DST(3, a5 + (a6>>2)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3463 DST(4, a0 - a1 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3464 DST(5, a6 - (a5>>2)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3465 DST(6, (a2>>1) - a3 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3466 DST(7, (a4>>2) - a7 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3467 } |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3468 |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3469 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3470 MpegEncContext * const s= (MpegEncContext *)c; |
5256 | 3471 DCTELEM dct[8][8]; |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3472 int i; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3473 int sum=0; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3474 |
5256 | 3475 s->dsp.diff_pixels(dct[0], src1, src2, stride); |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3476 |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3477 #define SRC(x) dct[i][x] |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3478 #define DST(x,v) dct[i][x]= v |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3479 for( i = 0; i < 8; i++ ) |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3480 DCT8_1D |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3481 #undef SRC |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3482 #undef DST |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3483 |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3484 #define SRC(x) dct[x][i] |
4001 | 3485 #define DST(x,v) sum += FFABS(v) |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3486 for( i = 0; i < 8; i++ ) |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3487 DCT8_1D |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3488 #undef SRC |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3489 #undef DST |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3490 return sum; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3491 } |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3492 #endif |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3493 |
2382 | 3494 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
3495 MpegEncContext * const s= (MpegEncContext *)c; | |
11195 | 3496 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
2382 | 3497 int sum=0, i; |
2967 | 3498 |
2382 | 3499 assert(h==8); |
3500 | |
3501 s->dsp.diff_pixels(temp, src1, src2, stride); | |
3502 s->dsp.fdct(temp); | |
3503 | |
3504 for(i=0; i<64; i++) | |
4001 | 3505 sum= FFMAX(sum, FFABS(temp[i])); |
2967 | 3506 |
2382 | 3507 return sum; |
3508 } | |
3509 | |
1708 | 3510 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
936 | 3511 MpegEncContext * const s= (MpegEncContext *)c; |
11195 | 3512 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]); |
11193 | 3513 DCTELEM * const bak = temp+64; |
936 | 3514 int sum=0, i; |
3515 | |
1708 | 3516 assert(h==8); |
936 | 3517 s->mb_intra=0; |
2967 | 3518 |
936 | 3519 s->dsp.diff_pixels(temp, src1, src2, stride); |
2967 | 3520 |
936 | 3521 memcpy(bak, temp, 64*sizeof(DCTELEM)); |
2967 | 3522 |
1013 | 3523 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
1689 | 3524 s->dct_unquantize_inter(s, temp, 0, s->qscale); |
6001 | 3525 ff_simple_idct(temp); //FIXME |
2967 | 3526 |
936 | 3527 for(i=0; i<64; i++) |
3528 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); | |
2967 | 3529 |
936 | 3530 return sum; |
3531 } | |
3532 | |
1708 | 3533 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
1007 | 3534 MpegEncContext * const s= (MpegEncContext *)c; |
1064 | 3535 const uint8_t *scantable= s->intra_scantable.permutated; |
11195 | 3536 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
3537 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]); | |
3538 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]); | |
6719 | 3539 int i, last, run, bits, level, distortion, start_i; |
1007 | 3540 const int esc_length= s->ac_esc_length; |
3541 uint8_t * length; | |
3542 uint8_t * last_length; | |
2967 | 3543 |
1708 | 3544 assert(h==8); |
3545 | |
10068 | 3546 copy_block8(lsrc1, src1, 8, stride, 8); |
3547 copy_block8(lsrc2, src2, 8, stride, 8); | |
3548 | |
3549 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8); | |
1007 | 3550 |
1013 | 3551 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
3552 | |
3553 bits=0; | |
2967 | 3554 |
1013 | 3555 if (s->mb_intra) { |
2967 | 3556 start_i = 1; |
1013 | 3557 length = s->intra_ac_vlc_length; |
3558 last_length= s->intra_ac_vlc_last_length; | |
3559 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma | |
3560 } else { | |
3561 start_i = 0; | |
3562 length = s->inter_ac_vlc_length; | |
3563 last_length= s->inter_ac_vlc_last_length; | |
3564 } | |
2967 | 3565 |
1013 | 3566 if(last>=start_i){ |
1007 | 3567 run=0; |
3568 for(i=start_i; i<last; i++){ | |
3569 int j= scantable[i]; | |
3570 level= temp[j]; | |
2967 | 3571 |
1007 | 3572 if(level){ |
3573 level+=64; | |
3574 if((level&(~127)) == 0){ | |
3575 bits+= length[UNI_AC_ENC_INDEX(run, level)]; | |
3576 }else | |
3577 bits+= esc_length; | |
3578 run=0; | |
3579 }else | |
3580 run++; | |
3581 } | |
3582 i= scantable[last]; | |
2967 | 3583 |
1011 | 3584 level= temp[i] + 64; |
3585 | |
3586 assert(level - 64); | |
2967 | 3587 |
1007 | 3588 if((level&(~127)) == 0){ |
3589 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | |
3590 }else | |
3591 bits+= esc_length; | |
2967 | 3592 |
1013 | 3593 } |
3594 | |
3595 if(last>=0){ | |
1689 | 3596 if(s->mb_intra) |
3597 s->dct_unquantize_intra(s, temp, 0, s->qscale); | |
3598 else | |
3599 s->dct_unquantize_inter(s, temp, 0, s->qscale); | |
1007 | 3600 } |
2967 | 3601 |
10068 | 3602 s->dsp.idct_add(lsrc2, 8, temp); |
3603 | |
3604 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8); | |
6719 | 3605 |
3606 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7); | |
1007 | 3607 } |
3608 | |
1708 | 3609 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
1007 | 3610 MpegEncContext * const s= (MpegEncContext *)c; |
1064 | 3611 const uint8_t *scantable= s->intra_scantable.permutated; |
11195 | 3612 LOCAL_ALIGNED_16(DCTELEM, temp, [64]); |
1007 | 3613 int i, last, run, bits, level, start_i; |
3614 const int esc_length= s->ac_esc_length; | |
3615 uint8_t * length; | |
3616 uint8_t * last_length; | |
1708 | 3617 |
3618 assert(h==8); | |
2967 | 3619 |
1013 | 3620 s->dsp.diff_pixels(temp, src1, src2, stride); |
1007 | 3621 |
1013 | 3622 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
3623 | |
3624 bits=0; | |
2967 | 3625 |
1007 | 3626 if (s->mb_intra) { |
2967 | 3627 start_i = 1; |
1007 | 3628 length = s->intra_ac_vlc_length; |
3629 last_length= s->intra_ac_vlc_last_length; | |
1013 | 3630 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma |
1007 | 3631 } else { |
3632 start_i = 0; | |
3633 length = s->inter_ac_vlc_length; | |
3634 last_length= s->inter_ac_vlc_last_length; | |
3635 } | |
2967 | 3636 |
1013 | 3637 if(last>=start_i){ |
1007 | 3638 run=0; |
3639 for(i=start_i; i<last; i++){ | |
3640 int j= scantable[i]; | |
3641 level= temp[j]; | |
2967 | 3642 |
1007 | 3643 if(level){ |
3644 level+=64; | |
3645 if((level&(~127)) == 0){ | |
3646 bits+= length[UNI_AC_ENC_INDEX(run, level)]; | |
3647 }else | |
3648 bits+= esc_length; | |
3649 run=0; | |
3650 }else | |
3651 run++; | |
3652 } | |
3653 i= scantable[last]; | |
2967 | 3654 |
1013 | 3655 level= temp[i] + 64; |
2967 | 3656 |
1013 | 3657 assert(level - 64); |
2967 | 3658 |
1007 | 3659 if((level&(~127)) == 0){ |
3660 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | |
3661 }else | |
3662 bits+= esc_length; | |
3663 } | |
3664 | |
3665 return bits; | |
3666 } | |
3667 | |
8978 | 3668 #define VSAD_INTRA(size) \ |
3669 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \ | |
3670 int score=0; \ | |
3671 int x,y; \ | |
3672 \ | |
3673 for(y=1; y<h; y++){ \ | |
3674 for(x=0; x<size; x+=4){ \ | |
3675 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \ | |
3676 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \ | |
3677 } \ | |
3678 s+= stride; \ | |
3679 } \ | |
3680 \ | |
3681 return score; \ | |
1729 | 3682 } |
8978 | 3683 VSAD_INTRA(8) |
3684 VSAD_INTRA(16) | |
1729 | 3685 |
3686 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | |
3687 int score=0; | |
3688 int x,y; | |
2967 | 3689 |
1729 | 3690 for(y=1; y<h; y++){ |
3691 for(x=0; x<16; x++){ | |
4001 | 3692 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); |
1729 | 3693 } |
3694 s1+= stride; | |
3695 s2+= stride; | |
3696 } | |
2967 | 3697 |
1729 | 3698 return score; |
3699 } | |
3700 | |
3701 #define SQ(a) ((a)*(a)) | |
8978 | 3702 #define VSSE_INTRA(size) \ |
3703 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \ | |
3704 int score=0; \ | |
3705 int x,y; \ | |
3706 \ | |
3707 for(y=1; y<h; y++){ \ | |
3708 for(x=0; x<size; x+=4){ \ | |
3709 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \ | |
3710 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \ | |
3711 } \ | |
3712 s+= stride; \ | |
3713 } \ | |
3714 \ | |
3715 return score; \ | |
1729 | 3716 } |
8978 | 3717 VSSE_INTRA(8) |
3718 VSSE_INTRA(16) | |
1729 | 3719 |
3720 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | |
3721 int score=0; | |
3722 int x,y; | |
2967 | 3723 |
1729 | 3724 for(y=1; y<h; y++){ |
3725 for(x=0; x<16; x++){ | |
3726 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); | |
3727 } | |
3728 s1+= stride; | |
3729 s2+= stride; | |
3730 } | |
2967 | 3731 |
1729 | 3732 return score; |
3733 } | |
3734 | |
5255 | 3735 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, |
3736 int size){ | |
4749 | 3737 int score=0; |
3738 int i; | |
3739 for(i=0; i<size; i++) | |
3740 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]); | |
3741 return score; | |
3742 } | |
3743 | |
6056
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3744 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) |
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3745 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) |
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3746 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) |
8590 | 3747 #if CONFIG_GPL |
6056
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3748 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c) |
3013 | 3749 #endif |
6056
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3750 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c) |
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3751 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) |
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3752 WRAPPER8_16_SQ(rd8x8_c, rd16_c) |
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
3753 WRAPPER8_16_SQ(bit8x8_c, bit16_c) |
936 | 3754 |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3755 static void vector_fmul_c(float *dst, const float *src, int len){ |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3756 int i; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3757 for(i=0; i<len; i++) |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3758 dst[i] *= src[i]; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3759 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3760 |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3761 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){ |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3762 int i; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3763 src1 += len-1; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3764 for(i=0; i<len; i++) |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3765 dst[i] = src0[i] * src1[-i]; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3766 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3767 |
10300
4d1b9ca628fc
Drop unused args from vector_fmul_add_add, simpify code, and rename
mru
parents:
10219
diff
changeset
|
3768 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){ |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3769 int i; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3770 for(i=0; i<len; i++) |
10300
4d1b9ca628fc
Drop unused args from vector_fmul_add_add, simpify code, and rename
mru
parents:
10219
diff
changeset
|
3771 dst[i] = src0[i] * src1[i] + src2[i]; |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3772 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3773 |
7261 | 3774 void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){ |
7263 | 3775 int i,j; |
3776 dst += len; | |
3777 win += len; | |
3778 src0+= len; | |
3779 for(i=-len, j=len-1; i<0; i++, j--) { | |
3780 float s0 = src0[i]; | |
3781 float s1 = src1[j]; | |
3782 float wi = win[i]; | |
3783 float wj = win[j]; | |
3784 dst[i] = s0*wj - s1*wi + add_bias; | |
3785 dst[j] = s0*wi + s1*wj + add_bias; | |
3786 } | |
7261 | 3787 } |
3788 | |
10219 | 3789 static void vector_fmul_scalar_c(float *dst, const float *src, float mul, |
3790 int len) | |
3791 { | |
3792 int i; | |
3793 for (i = 0; i < len; i++) | |
3794 dst[i] = src[i] * mul; | |
3795 } | |
3796 | |
3797 static void vector_fmul_sv_scalar_2_c(float *dst, const float *src, | |
3798 const float **sv, float mul, int len) | |
3799 { | |
3800 int i; | |
3801 for (i = 0; i < len; i += 2, sv++) { | |
3802 dst[i ] = src[i ] * sv[0][0] * mul; | |
3803 dst[i+1] = src[i+1] * sv[0][1] * mul; | |
3804 } | |
3805 } | |
3806 | |
3807 static void vector_fmul_sv_scalar_4_c(float *dst, const float *src, | |
3808 const float **sv, float mul, int len) | |
3809 { | |
3810 int i; | |
3811 for (i = 0; i < len; i += 4, sv++) { | |
3812 dst[i ] = src[i ] * sv[0][0] * mul; | |
3813 dst[i+1] = src[i+1] * sv[0][1] * mul; | |
3814 dst[i+2] = src[i+2] * sv[0][2] * mul; | |
3815 dst[i+3] = src[i+3] * sv[0][3] * mul; | |
3816 } | |
3817 } | |
3818 | |
3819 static void sv_fmul_scalar_2_c(float *dst, const float **sv, float mul, | |
3820 int len) | |
3821 { | |
3822 int i; | |
3823 for (i = 0; i < len; i += 2, sv++) { | |
3824 dst[i ] = sv[0][0] * mul; | |
3825 dst[i+1] = sv[0][1] * mul; | |
3826 } | |
3827 } | |
3828 | |
3829 static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul, | |
3830 int len) | |
3831 { | |
3832 int i; | |
3833 for (i = 0; i < len; i += 4, sv++) { | |
3834 dst[i ] = sv[0][0] * mul; | |
3835 dst[i+1] = sv[0][1] * mul; | |
3836 dst[i+2] = sv[0][2] * mul; | |
3837 dst[i+3] = sv[0][3] * mul; | |
3838 } | |
3839 } | |
3840 | |
3841 static void butterflies_float_c(float *restrict v1, float *restrict v2, | |
3842 int len) | |
3843 { | |
3844 int i; | |
3845 for (i = 0; i < len; i++) { | |
3846 float t = v1[i] - v2[i]; | |
3847 v1[i] += v2[i]; | |
3848 v2[i] = t; | |
3849 } | |
3850 } | |
3851 | |
3852 static float scalarproduct_float_c(const float *v1, const float *v2, int len) | |
3853 { | |
3854 float p = 0.0; | |
3855 int i; | |
3856 | |
3857 for (i = 0; i < len; i++) | |
3858 p += v1[i] * v2[i]; | |
3859 | |
3860 return p; | |
3861 } | |
3862 | |
7564 | 3863 static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){ |
3864 int i; | |
3865 for(i=0; i<len; i++) | |
3866 dst[i] = src[i] * mul; | |
3867 } | |
3868 | |
10104
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3869 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini, |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3870 uint32_t maxi, uint32_t maxisign) |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3871 { |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3872 |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3873 if(a > mini) return mini; |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3874 else if((a^(1<<31)) > maxisign) return maxi; |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3875 else return a; |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3876 } |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3877 |
10105 | 3878 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){ |
10104
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3879 int i; |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3880 uint32_t mini = *(uint32_t*)min; |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3881 uint32_t maxi = *(uint32_t*)max; |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3882 uint32_t maxisign = maxi ^ (1<<31); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3883 uint32_t *dsti = (uint32_t*)dst; |
10105 | 3884 const uint32_t *srci = (const uint32_t*)src; |
10104
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3885 for(i=0; i<len; i+=8) { |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3886 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3887 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3888 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3889 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3890 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3891 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3892 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3893 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3894 } |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3895 } |
10105 | 3896 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){ |
10104
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3897 int i; |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3898 if(min < 0 && max > 0) { |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3899 vector_clipf_c_opposite_sign(dst, src, &min, &max, len); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3900 } else { |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3901 for(i=0; i < len; i+=8) { |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3902 dst[i ] = av_clipf(src[i ], min, max); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3903 dst[i + 1] = av_clipf(src[i + 1], min, max); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3904 dst[i + 2] = av_clipf(src[i + 2], min, max); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3905 dst[i + 3] = av_clipf(src[i + 3], min, max); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3906 dst[i + 4] = av_clipf(src[i + 4], min, max); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3907 dst[i + 5] = av_clipf(src[i + 5], min, max); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3908 dst[i + 6] = av_clipf(src[i + 6], min, max); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3909 dst[i + 7] = av_clipf(src[i + 7], min, max); |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3910 } |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3911 } |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3912 } |
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
3913 |
7261 | 3914 static av_always_inline int float_to_int16_one(const float *src){ |
3915 int_fast32_t tmp = *(const int32_t*)src; | |
3916 if(tmp & 0xf0000){ | |
3917 tmp = (0x43c0ffff - tmp)>>31; | |
3918 // is this faster on some gcc/cpu combinations? | |
3919 // if(tmp > 0x43c0ffff) tmp = 0xFFFF; | |
3920 // else tmp = 0; | |
3921 } | |
3922 return tmp - 0x8000; | |
3923 } | |
3924 | |
7218 | 3925 void ff_float_to_int16_c(int16_t *dst, const float *src, long len){ |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3926 int i; |
7261 | 3927 for(i=0; i<len; i++) |
3928 dst[i] = float_to_int16_one(src+i); | |
3929 } | |
3930 | |
7286
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
3931 void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){ |
7261 | 3932 int i,j,c; |
3933 if(channels==2){ | |
3934 for(i=0; i<len; i++){ | |
7286
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
3935 dst[2*i] = float_to_int16_one(src[0]+i); |
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
3936 dst[2*i+1] = float_to_int16_one(src[1]+i); |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3937 } |
7261 | 3938 }else{ |
7286
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
3939 for(c=0; c<channels; c++) |
7261 | 3940 for(i=0, j=c; i<len; i++, j+=channels) |
7286
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
3941 dst[j] = float_to_int16_one(src[c]+i); |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3942 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3943 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3944 |
11981 | 3945 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift) |
7203
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3946 { |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3947 int res = 0; |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3948 |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3949 while (order--) |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3950 res += (*v1++ * *v2++) >> shift; |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3951 |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3952 return res; |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3953 } |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
3954 |
11981 | 3955 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul) |
10644 | 3956 { |
3957 int res = 0; | |
3958 while (order--) { | |
3959 res += *v1 * *v2++; | |
3960 *v1++ += mul * *v3++; | |
3961 } | |
3962 return res; | |
3963 } | |
3964 | |
5887 | 3965 #define W0 2048 |
3966 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ | |
3967 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ | |
3968 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ | |
3969 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */ | |
3970 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ | |
3971 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ | |
3972 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ | |
3973 | |
3974 static void wmv2_idct_row(short * b) | |
3975 { | |
3976 int s1,s2; | |
3977 int a0,a1,a2,a3,a4,a5,a6,a7; | |
3978 /*step 1*/ | |
3979 a1 = W1*b[1]+W7*b[7]; | |
3980 a7 = W7*b[1]-W1*b[7]; | |
3981 a5 = W5*b[5]+W3*b[3]; | |
3982 a3 = W3*b[5]-W5*b[3]; | |
3983 a2 = W2*b[2]+W6*b[6]; | |
3984 a6 = W6*b[2]-W2*b[6]; | |
3985 a0 = W0*b[0]+W0*b[4]; | |
3986 a4 = W0*b[0]-W0*b[4]; | |
3987 /*step 2*/ | |
3988 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7, | |
3989 s2 = (181*(a1-a5-a7+a3)+128)>>8; | |
3990 /*step 3*/ | |
3991 b[0] = (a0+a2+a1+a5 + (1<<7))>>8; | |
3992 b[1] = (a4+a6 +s1 + (1<<7))>>8; | |
3993 b[2] = (a4-a6 +s2 + (1<<7))>>8; | |
3994 b[3] = (a0-a2+a7+a3 + (1<<7))>>8; | |
3995 b[4] = (a0-a2-a7-a3 + (1<<7))>>8; | |
3996 b[5] = (a4-a6 -s2 + (1<<7))>>8; | |
3997 b[6] = (a4+a6 -s1 + (1<<7))>>8; | |
3998 b[7] = (a0+a2-a1-a5 + (1<<7))>>8; | |
3999 } | |
4000 static void wmv2_idct_col(short * b) | |
4001 { | |
4002 int s1,s2; | |
4003 int a0,a1,a2,a3,a4,a5,a6,a7; | |
4004 /*step 1, with extended precision*/ | |
4005 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3; | |
4006 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3; | |
4007 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3; | |
4008 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3; | |
4009 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3; | |
4010 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3; | |
4011 a0 = (W0*b[8*0]+W0*b[8*4] )>>3; | |
4012 a4 = (W0*b[8*0]-W0*b[8*4] )>>3; | |
4013 /*step 2*/ | |
4014 s1 = (181*(a1-a5+a7-a3)+128)>>8; | |
4015 s2 = (181*(a1-a5-a7+a3)+128)>>8; | |
4016 /*step 3*/ | |
4017 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14; | |
4018 b[8*1] = (a4+a6 +s1 + (1<<13))>>14; | |
4019 b[8*2] = (a4-a6 +s2 + (1<<13))>>14; | |
4020 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14; | |
4021 | |
4022 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14; | |
4023 b[8*5] = (a4-a6 -s2 + (1<<13))>>14; | |
4024 b[8*6] = (a4+a6 -s1 + (1<<13))>>14; | |
4025 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14; | |
4026 } | |
4027 void ff_wmv2_idct_c(short * block){ | |
4028 int i; | |
4029 | |
4030 for(i=0;i<64;i+=8){ | |
4031 wmv2_idct_row(block+i); | |
4032 } | |
4033 for(i=0;i<8;i++){ | |
4034 wmv2_idct_col(block+i); | |
4035 } | |
4036 } | |
1092 | 4037 /* XXX: those functions should be suppressed ASAP when all IDCTs are |
4038 converted */ | |
5887 | 4039 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block) |
4040 { | |
4041 ff_wmv2_idct_c(block); | |
4042 put_pixels_clamped_c(block, dest, line_size); | |
4043 } | |
4044 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block) | |
4045 { | |
4046 ff_wmv2_idct_c(block); | |
4047 add_pixels_clamped_c(block, dest, line_size); | |
4048 } | |
1092 | 4049 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) |
4050 { | |
4051 j_rev_dct (block); | |
4052 put_pixels_clamped_c(block, dest, line_size); | |
4053 } | |
4054 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) | |
4055 { | |
4056 j_rev_dct (block); | |
4057 add_pixels_clamped_c(block, dest, line_size); | |
4058 } | |
4059 | |
2256 | 4060 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block) |
4061 { | |
4062 j_rev_dct4 (block); | |
4063 put_pixels_clamped4_c(block, dest, line_size); | |
4064 } | |
4065 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block) | |
4066 { | |
4067 j_rev_dct4 (block); | |
4068 add_pixels_clamped4_c(block, dest, line_size); | |
4069 } | |
4070 | |
2257 | 4071 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block) |
4072 { | |
4073 j_rev_dct2 (block); | |
4074 put_pixels_clamped2_c(block, dest, line_size); | |
4075 } | |
4076 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) | |
4077 { | |
4078 j_rev_dct2 (block); | |
4079 add_pixels_clamped2_c(block, dest, line_size); | |
4080 } | |
4081 | |
2259 | 4082 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) |
4083 { | |
4176 | 4084 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2259 | 4085 |
4086 dest[0] = cm[(block[0] + 4)>>3]; | |
4087 } | |
4088 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) | |
4089 { | |
4176 | 4090 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2259 | 4091 |
4092 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; | |
4093 } | |
4094 | |
5143 | 4095 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; } |
3215
06f98047ff26
prefetch pixels for future motion compensation. 2-5% faster h264.
lorenm
parents:
3199
diff
changeset
|
4096 |
1201 | 4097 /* init static data */ |
10867 | 4098 av_cold void dsputil_static_init(void) |
0 | 4099 { |
751 | 4100 int i; |
0 | 4101 |
4176 | 4102 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i; |
1201 | 4103 for(i=0;i<MAX_NEG_CROP;i++) { |
4176 | 4104 ff_cropTbl[i] = 0; |
4105 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255; | |
1201 | 4106 } |
2967 | 4107 |
1201 | 4108 for(i=0;i<512;i++) { |
4179 | 4109 ff_squareTbl[i] = (i - 256) * (i - 256); |
1201 | 4110 } |
2967 | 4111 |
4197 | 4112 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; |
1201 | 4113 } |
0 | 4114 |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4115 int ff_check_alignment(void){ |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4116 static int did_fail=0; |
11369 | 4117 DECLARE_ALIGNED(16, int, aligned); |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4118 |
9259 | 4119 if((intptr_t)&aligned & 15){ |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4120 if(!did_fail){ |
8590 | 4121 #if HAVE_MMX || HAVE_ALTIVEC |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4122 av_log(NULL, AV_LOG_ERROR, |
4292 | 4123 "Compiler did not align stack variables. Libavcodec has been miscompiled\n" |
4124 "and may be very slow or crash. This is not a bug in libavcodec,\n" | |
5542
b0a566346fb1
Add attribute that forces alignment of stack to functions that need it.
ramiro
parents:
5520
diff
changeset
|
4125 "but in the compiler. You may try recompiling using gcc >= 4.2.\n" |
b0a566346fb1
Add attribute that forces alignment of stack to functions that need it.
ramiro
parents:
5520
diff
changeset
|
4126 "Do not report crashes to FFmpeg developers.\n"); |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4127 #endif |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4128 did_fail=1; |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4129 } |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4130 return -1; |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4131 } |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4132 return 0; |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4133 } |
861 | 4134 |
10867 | 4135 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) |
1201 | 4136 { |
4137 int i; | |
0 | 4138 |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4139 ff_check_alignment(); |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4140 |
8590 | 4141 #if CONFIG_ENCODERS |
1567 | 4142 if(avctx->dct_algo==FF_DCT_FASTINT) { |
1092 | 4143 c->fdct = fdct_ifast; |
2979 | 4144 c->fdct248 = fdct_ifast248; |
2967 | 4145 } |
1567 | 4146 else if(avctx->dct_algo==FF_DCT_FAAN) { |
1557 | 4147 c->fdct = ff_faandct; |
2979 | 4148 c->fdct248 = ff_faandct248; |
2967 | 4149 } |
1567 | 4150 else { |
1092 | 4151 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default |
2979 | 4152 c->fdct248 = ff_fdct248_islow; |
1567 | 4153 } |
1092 | 4154 #endif //CONFIG_ENCODERS |
4155 | |
2256 | 4156 if(avctx->lowres==1){ |
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4157 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){ |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4158 c->idct_put= ff_jref_idct4_put; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4159 c->idct_add= ff_jref_idct4_add; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4160 }else{ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4161 c->idct_put= ff_h264_lowres_idct_put_c; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4162 c->idct_add= ff_h264_lowres_idct_add_c; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4163 } |
2256 | 4164 c->idct = j_rev_dct4; |
1092 | 4165 c->idct_permutation_type= FF_NO_IDCT_PERM; |
2257 | 4166 }else if(avctx->lowres==2){ |
4167 c->idct_put= ff_jref_idct2_put; | |
4168 c->idct_add= ff_jref_idct2_add; | |
4169 c->idct = j_rev_dct2; | |
4170 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
2259 | 4171 }else if(avctx->lowres==3){ |
4172 c->idct_put= ff_jref_idct1_put; | |
4173 c->idct_add= ff_jref_idct1_add; | |
4174 c->idct = j_rev_dct1; | |
4175 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
2256 | 4176 }else{ |
4177 if(avctx->idct_algo==FF_IDCT_INT){ | |
4178 c->idct_put= ff_jref_idct_put; | |
4179 c->idct_add= ff_jref_idct_add; | |
4180 c->idct = j_rev_dct; | |
4181 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; | |
9975
d6d7e8d4a04d
Do not redundantly check for both CONFIG_THEORA_DECODER and CONFIG_VP3_DECODER.
diego
parents:
9586
diff
changeset
|
4182 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) && |
5007 | 4183 avctx->idct_algo==FF_IDCT_VP3){ |
2693 | 4184 c->idct_put= ff_vp3_idct_put_c; |
4185 c->idct_add= ff_vp3_idct_add_c; | |
4186 c->idct = ff_vp3_idct_c; | |
4187 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
5887 | 4188 }else if(avctx->idct_algo==FF_IDCT_WMV2){ |
4189 c->idct_put= ff_wmv2_idct_put_c; | |
4190 c->idct_add= ff_wmv2_idct_add_c; | |
4191 c->idct = ff_wmv2_idct_c; | |
4192 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
6407 | 4193 }else if(avctx->idct_algo==FF_IDCT_FAAN){ |
4194 c->idct_put= ff_faanidct_put; | |
4195 c->idct_add= ff_faanidct_add; | |
4196 c->idct = ff_faanidct; | |
4197 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4198 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) { |
8120 | 4199 c->idct_put= ff_ea_idct_put_c; |
4200 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
11231 | 4201 }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) { |
4202 c->idct = ff_bink_idct_c; | |
4203 c->idct_add = ff_bink_idct_add_c; | |
4204 c->idct_put = ff_bink_idct_put_c; | |
4205 c->idct_permutation_type = FF_NO_IDCT_PERM; | |
2256 | 4206 }else{ //accurate/default |
6001 | 4207 c->idct_put= ff_simple_idct_put; |
4208 c->idct_add= ff_simple_idct_add; | |
4209 c->idct = ff_simple_idct; | |
2256 | 4210 c->idct_permutation_type= FF_NO_IDCT_PERM; |
4211 } | |
1092 | 4212 } |
4213 | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4214 c->get_pixels = get_pixels_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4215 c->diff_pixels = diff_pixels_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4216 c->put_pixels_clamped = put_pixels_clamped_c; |
1984
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
4217 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; |
11231 | 4218 c->put_pixels_nonclamped = put_pixels_nonclamped_c; |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4219 c->add_pixels_clamped = add_pixels_clamped_c; |
2763 | 4220 c->add_pixels8 = add_pixels8_c; |
4221 c->add_pixels4 = add_pixels4_c; | |
4988
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
4222 c->sum_abs_dctelem = sum_abs_dctelem_c; |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4223 c->gmc1 = gmc1_c; |
3248
7aa9f80e7954
mmx implementation of 3-point GMC. (5x faster than C)
lorenm
parents:
3245
diff
changeset
|
4224 c->gmc = ff_gmc_c; |
8288 | 4225 c->clear_block = clear_block_c; |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4226 c->clear_blocks = clear_blocks_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4227 c->pix_sum = pix_sum_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4228 c->pix_norm1 = pix_norm1_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4229 |
11231 | 4230 c->fill_block_tab[0] = fill_block16_c; |
4231 c->fill_block_tab[1] = fill_block8_c; | |
4232 c->scale_block = scale_block_c; | |
4233 | |
859 | 4234 /* TODO [0] 16 [1] 8 */ |
1708 | 4235 c->pix_abs[0][0] = pix_abs16_c; |
4236 c->pix_abs[0][1] = pix_abs16_x2_c; | |
4237 c->pix_abs[0][2] = pix_abs16_y2_c; | |
4238 c->pix_abs[0][3] = pix_abs16_xy2_c; | |
4239 c->pix_abs[1][0] = pix_abs8_c; | |
4240 c->pix_abs[1][1] = pix_abs8_x2_c; | |
4241 c->pix_abs[1][2] = pix_abs8_y2_c; | |
4242 c->pix_abs[1][3] = pix_abs8_xy2_c; | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4243 |
859 | 4244 #define dspfunc(PFX, IDX, NUM) \ |
4245 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \ | |
4246 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \ | |
4247 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \ | |
4248 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4249 |
859 | 4250 dspfunc(put, 0, 16); |
4251 dspfunc(put_no_rnd, 0, 16); | |
4252 dspfunc(put, 1, 8); | |
4253 dspfunc(put_no_rnd, 1, 8); | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4254 dspfunc(put, 2, 4); |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4255 dspfunc(put, 3, 2); |
0 | 4256 |
859 | 4257 dspfunc(avg, 0, 16); |
4258 dspfunc(avg_no_rnd, 0, 16); | |
4259 dspfunc(avg, 1, 8); | |
4260 dspfunc(avg_no_rnd, 1, 8); | |
1319 | 4261 dspfunc(avg, 2, 4); |
4262 dspfunc(avg, 3, 2); | |
859 | 4263 #undef dspfunc |
857 | 4264 |
1864 | 4265 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c; |
4266 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c; | |
4267 | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4268 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4269 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4270 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4271 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4272 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4273 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4274 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4275 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4276 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4277 |
1319 | 4278 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; |
4279 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; | |
4280 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; | |
4281 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; | |
4282 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; | |
4283 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; | |
4284 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; | |
4285 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; | |
4286 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; | |
4287 | |
859 | 4288 #define dspfunc(PFX, IDX, NUM) \ |
4289 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \ | |
4290 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \ | |
4291 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \ | |
4292 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \ | |
4293 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \ | |
4294 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \ | |
4295 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \ | |
4296 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \ | |
4297 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \ | |
4298 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \ | |
4299 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \ | |
4300 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \ | |
4301 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \ | |
4302 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \ | |
4303 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \ | |
4304 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c | |
857 | 4305 |
859 | 4306 dspfunc(put_qpel, 0, 16); |
4307 dspfunc(put_no_rnd_qpel, 0, 16); | |
4308 | |
4309 dspfunc(avg_qpel, 0, 16); | |
4310 /* dspfunc(avg_no_rnd_qpel, 0, 16); */ | |
857 | 4311 |
859 | 4312 dspfunc(put_qpel, 1, 8); |
4313 dspfunc(put_no_rnd_qpel, 1, 8); | |
4314 | |
4315 dspfunc(avg_qpel, 1, 8); | |
4316 /* dspfunc(avg_no_rnd_qpel, 1, 8); */ | |
1168 | 4317 |
4318 dspfunc(put_h264_qpel, 0, 16); | |
4319 dspfunc(put_h264_qpel, 1, 8); | |
4320 dspfunc(put_h264_qpel, 2, 4); | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
4321 dspfunc(put_h264_qpel, 3, 2); |
1168 | 4322 dspfunc(avg_h264_qpel, 0, 16); |
4323 dspfunc(avg_h264_qpel, 1, 8); | |
4324 dspfunc(avg_h264_qpel, 2, 4); | |
4325 | |
859 | 4326 #undef dspfunc |
1168 | 4327 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c; |
4328 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c; | |
4329 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c; | |
4330 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c; | |
4331 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c; | |
4332 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c; | |
9439
ef3a7b711cc0
Rename put_no_rnd_h264_chroma* to reflect its usage in VC1 only
conrad
parents:
9437
diff
changeset
|
4333 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c; |
9440 | 4334 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c; |
857 | 4335 |
6437 | 4336 c->draw_edges = draw_edges_c; |
4337 | |
9585 | 4338 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER |
4339 ff_mlp_init(c, avctx); | |
4340 #endif | |
9995
3141f69e3905
Do not check for both CONFIG_VC1_DECODER and CONFIG_WMV3_DECODER,
diego
parents:
9975
diff
changeset
|
4341 #if CONFIG_VC1_DECODER |
3526 | 4342 ff_vc1dsp_init(c,avctx); |
4343 #endif | |
9995
3141f69e3905
Do not check for both CONFIG_VC1_DECODER and CONFIG_WMV3_DECODER,
diego
parents:
9975
diff
changeset
|
4344 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER |
5887 | 4345 ff_intrax8dsp_init(c,avctx); |
4346 #endif | |
8590 | 4347 #if CONFIG_RV30_DECODER |
8410 | 4348 ff_rv30dsp_init(c,avctx); |
4349 #endif | |
8590 | 4350 #if CONFIG_RV40_DECODER |
8232 | 4351 ff_rv40dsp_init(c,avctx); |
4352 c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c; | |
4353 c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c; | |
4354 c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c; | |
4355 c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c; | |
4356 #endif | |
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
4357 |
12423 | 4358 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c; |
936 | 4359 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; |
4360 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; | |
4361 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; | |
4362 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; | |
4363 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; | |
4364 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; | |
4365 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; | |
2967 | 4366 |
1708 | 4367 #define SET_CMP_FUNC(name) \ |
4368 c->name[0]= name ## 16_c;\ | |
4369 c->name[1]= name ## 8x8_c; | |
2967 | 4370 |
1708 | 4371 SET_CMP_FUNC(hadamard8_diff) |
1729 | 4372 c->hadamard8_diff[4]= hadamard8_intra16_c; |
8978 | 4373 c->hadamard8_diff[5]= hadamard8_intra8x8_c; |
1708 | 4374 SET_CMP_FUNC(dct_sad) |
2382 | 4375 SET_CMP_FUNC(dct_max) |
8590 | 4376 #if CONFIG_GPL |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
4377 SET_CMP_FUNC(dct264_sad) |
3013 | 4378 #endif |
1708 | 4379 c->sad[0]= pix_abs16_c; |
4380 c->sad[1]= pix_abs8_c; | |
4381 c->sse[0]= sse16_c; | |
4382 c->sse[1]= sse8_c; | |
2184 | 4383 c->sse[2]= sse4_c; |
1708 | 4384 SET_CMP_FUNC(quant_psnr) |
4385 SET_CMP_FUNC(rd) | |
4386 SET_CMP_FUNC(bit) | |
1729 | 4387 c->vsad[0]= vsad16_c; |
4388 c->vsad[4]= vsad_intra16_c; | |
8978 | 4389 c->vsad[5]= vsad_intra8_c; |
1729 | 4390 c->vsse[0]= vsse16_c; |
4391 c->vsse[4]= vsse_intra16_c; | |
8978 | 4392 c->vsse[5]= vsse_intra8_c; |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
4393 c->nsse[0]= nsse16_c; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
4394 c->nsse[1]= nsse8_c; |
11485 | 4395 #if CONFIG_DWT |
4396 ff_dsputil_init_dwt(c); | |
3373
b8996cc5ccae
Disable w53 and w97 cmp methods when snow encoder is disabled
gpoirier
parents:
3323
diff
changeset
|
4397 #endif |
2184 | 4398 |
4749 | 4399 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; |
4400 | |
866 | 4401 c->add_bytes= add_bytes_c; |
6384 | 4402 c->add_bytes_l2= add_bytes_l2_c; |
866 | 4403 c->diff_bytes= diff_bytes_c; |
8760 | 4404 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c; |
1527 | 4405 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; |
10370 | 4406 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c; |
4407 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c; | |
1273 | 4408 c->bswap_buf= bswap_buf; |
8590 | 4409 #if CONFIG_PNG_DECODER |
6384 | 4410 c->add_png_paeth_prediction= ff_add_png_paeth_prediction; |
4411 #endif | |
2633 | 4412 |
10749
5cca4b6c459d
Get rid of pointless CONFIG_ANY_H263 preprocessor definition.
diego
parents:
10748
diff
changeset
|
4413 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { |
5278 | 4414 c->h263_h_loop_filter= h263_h_loop_filter_c; |
4415 c->h263_v_loop_filter= h263_v_loop_filter_c; | |
5277
7b3fcb7c61ce
Avoid linking with h263.c functions when the relevant codecs
aurel
parents:
5256
diff
changeset
|
4416 } |
2967 | 4417 |
9975
d6d7e8d4a04d
Do not redundantly check for both CONFIG_THEORA_DECODER and CONFIG_VP3_DECODER.
diego
parents:
9586
diff
changeset
|
4418 if (CONFIG_VP3_DECODER) { |
7995 | 4419 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c; |
4420 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c; | |
11637 | 4421 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c; |
7995 | 4422 } |
4423 | |
2045 | 4424 c->h261_loop_filter= h261_loop_filter_c; |
2967 | 4425 |
1784 | 4426 c->try_8x8basis= try_8x8basis_c; |
4427 c->add_8x8basis= add_8x8basis_c; | |
866 | 4428 |
8590 | 4429 #if CONFIG_VORBIS_DECODER |
3536
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
4430 c->vorbis_inverse_coupling = vorbis_inverse_coupling; |
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
4431 #endif |
8590 | 4432 #if CONFIG_AC3_DECODER |
7563 | 4433 c->ac3_downmix = ff_ac3_downmix_c; |
4434 #endif | |
10429
289dd8daf4ee
add CONFIG_LPC to the build system for lpc dsputil functions. fixes build
jbr
parents:
10424
diff
changeset
|
4435 #if CONFIG_LPC |
10424
94595d0e617c
Move autocorrelation function from flacenc.c to lpc.c. Also rename the
jbr
parents:
10421
diff
changeset
|
4436 c->lpc_compute_autocorr = ff_lpc_compute_autocorr; |
10429
289dd8daf4ee
add CONFIG_LPC to the build system for lpc dsputil functions. fixes build
jbr
parents:
10424
diff
changeset
|
4437 #endif |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4438 c->vector_fmul = vector_fmul_c; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4439 c->vector_fmul_reverse = vector_fmul_reverse_c; |
10300
4d1b9ca628fc
Drop unused args from vector_fmul_add_add, simpify code, and rename
mru
parents:
10219
diff
changeset
|
4440 c->vector_fmul_add = vector_fmul_add_c; |
7261 | 4441 c->vector_fmul_window = ff_vector_fmul_window_c; |
7564 | 4442 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c; |
10104
0fa3d21b317e
SSE optimized vector_clipf(). 10% faster TwinVQ decoding.
vitor
parents:
10094
diff
changeset
|
4443 c->vector_clipf = vector_clipf_c; |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4444 c->float_to_int16 = ff_float_to_int16_c; |
7261 | 4445 c->float_to_int16_interleave = ff_float_to_int16_interleave_c; |
7203
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4446 c->scalarproduct_int16 = scalarproduct_int16_c; |
10644 | 4447 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c; |
10219 | 4448 c->scalarproduct_float = scalarproduct_float_c; |
4449 c->butterflies_float = butterflies_float_c; | |
4450 c->vector_fmul_scalar = vector_fmul_scalar_c; | |
4451 | |
4452 c->vector_fmul_sv_scalar[0] = vector_fmul_sv_scalar_2_c; | |
4453 c->vector_fmul_sv_scalar[1] = vector_fmul_sv_scalar_4_c; | |
4454 | |
4455 c->sv_fmul_scalar[0] = sv_fmul_scalar_2_c; | |
4456 c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c; | |
3536
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
4457 |
12466
0a306a267dbf
Reimplement ff_img_copy_plane() as av_image_copy_plane() in libavcore,
stefano
parents:
12423
diff
changeset
|
4458 c->shrink[0]= av_image_copy_plane; |
3245 | 4459 c->shrink[1]= ff_shrink22; |
4460 c->shrink[2]= ff_shrink44; | |
4461 c->shrink[3]= ff_shrink88; | |
4462 | |
3215
06f98047ff26
prefetch pixels for future motion compensation. 2-5% faster h264.
lorenm
parents:
3199
diff
changeset
|
4463 c->prefetch= just_return; |
06f98047ff26
prefetch pixels for future motion compensation. 2-5% faster h264.
lorenm
parents:
3199
diff
changeset
|
4464 |
3807
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4465 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab)); |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4466 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab)); |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4467 |
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4468 if (HAVE_MMX) dsputil_init_mmx (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4469 if (ARCH_ARM) dsputil_init_arm (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4470 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4471 if (HAVE_VIS) dsputil_init_vis (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4472 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4473 if (ARCH_PPC) dsputil_init_ppc (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4474 if (HAVE_MMI) dsputil_init_mmi (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4475 if (ARCH_SH4) dsputil_init_sh4 (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4476 if (ARCH_BFIN) dsputil_init_bfin (c, avctx); |
1092 | 4477 |
3807
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4478 for(i=0; i<64; i++){ |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4479 if(!c->put_2tap_qpel_pixels_tab[0][i]) |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4480 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i]; |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4481 if(!c->avg_2tap_qpel_pixels_tab[0][i]) |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4482 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i]; |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4483 } |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4484 |
11988
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4485 c->put_rv30_tpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0]; |
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4486 c->put_rv30_tpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0]; |
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4487 c->avg_rv30_tpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0]; |
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4488 c->avg_rv30_tpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0]; |
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4489 |
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4490 c->put_rv40_qpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0]; |
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4491 c->put_rv40_qpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0]; |
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4492 c->avg_rv40_qpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0]; |
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4493 c->avg_rv40_qpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0]; |
e382860b855f
Set rv34 (0,0) subpel mc functions to the optimised h264 ones
mru
parents:
11981
diff
changeset
|
4494 |
1092 | 4495 switch(c->idct_permutation_type){ |
4496 case FF_NO_IDCT_PERM: | |
4497 for(i=0; i<64; i++) | |
4498 c->idct_permutation[i]= i; | |
4499 break; | |
4500 case FF_LIBMPEG2_IDCT_PERM: | |
4501 for(i=0; i<64; i++) | |
4502 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |
4503 break; | |
4504 case FF_SIMPLE_IDCT_PERM: | |
4505 for(i=0; i<64; i++) | |
4506 c->idct_permutation[i]= simple_mmx_permutation[i]; | |
4507 break; | |
4508 case FF_TRANSPOSE_IDCT_PERM: | |
4509 for(i=0; i<64; i++) | |
4510 c->idct_permutation[i]= ((i&7)<<3) | (i>>3); | |
4511 break; | |
2696
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4512 case FF_PARTTRANS_IDCT_PERM: |
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4513 for(i=0; i<64; i++) |
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4514 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); |
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4515 break; |
6600
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
4516 case FF_SSE2_IDCT_PERM: |
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
4517 for(i=0; i<64; i++) |
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
4518 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; |
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
4519 break; |
1092 | 4520 default: |
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1571
diff
changeset
|
4521 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); |
1092 | 4522 } |
0 | 4523 } |
252
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
4524 |