Mercurial > libavcodec.hg
annotate dsputil.c @ 5542:b0a566346fb1 libavcodec
Add attribute that forces alignment of stack to functions that need it.
Necessary for systems that don't align by default to 16 bytes, required by some
SSE instructions.
Requires GCC >= 4.2.
Based on patch by Gal Chardon.
author | ramiro |
---|---|
date | Mon, 13 Aug 2007 15:28:29 +0000 |
parents | c16a59ef6a86 |
children | ec04964a1d1a |
rev | line source |
---|---|
0 | 1 /* |
2 * DSP utils | |
429 | 3 * Copyright (c) 2000, 2001 Fabrice Bellard. |
1739
07a484280a82
copyright year update of the files i touched and remembered, things look annoyingly unmaintained otherwise
michael
parents:
1729
diff
changeset
|
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
0 | 5 * |
5214 | 6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> |
7 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
8 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
9 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
10 * FFmpeg is free software; you can redistribute it and/or |
429 | 11 * modify it under the terms of the GNU Lesser General Public |
12 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
13 * version 2.1 of the License, or (at your option) any later version. |
0 | 14 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
15 * FFmpeg is distributed in the hope that it will be useful, |
0 | 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
18 * Lesser General Public License for more details. | |
0 | 19 * |
429 | 20 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
21 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3029
diff
changeset
|
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
0 | 23 */ |
2967 | 24 |
1106 | 25 /** |
26 * @file dsputil.c | |
27 * DSP utils | |
28 */ | |
2967 | 29 |
0 | 30 #include "avcodec.h" |
31 #include "dsputil.h" | |
936 | 32 #include "mpegvideo.h" |
1092 | 33 #include "simple_idct.h" |
1557 | 34 #include "faandct.h" |
5277
7b3fcb7c61ce
Avoid linking with h263.c functions when the relevant codecs
aurel
parents:
5256
diff
changeset
|
35 #include "h263.h" |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3105
diff
changeset
|
36 #include "snow.h" |
676 | 37 |
2522
e25782262d7d
kill warnings patch by (Mns Rullgrd <mru inprovide com>)
michael
parents:
2448
diff
changeset
|
38 /* snow.c */ |
e25782262d7d
kill warnings patch by (Mns Rullgrd <mru inprovide com>)
michael
parents:
2448
diff
changeset
|
39 void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count); |
e25782262d7d
kill warnings patch by (Mns Rullgrd <mru inprovide com>)
michael
parents:
2448
diff
changeset
|
40 |
3536
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
41 /* vorbis.c */ |
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
42 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize); |
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
43 |
4176 | 44 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; |
4179 | 45 uint32_t ff_squareTbl[512] = {0, }; |
0 | 46 |
1064 | 47 const uint8_t ff_zigzag_direct[64] = { |
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
48 0, 1, 8, 16, 9, 2, 3, 10, |
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
49 17, 24, 32, 25, 18, 11, 4, 5, |
34 | 50 12, 19, 26, 33, 40, 48, 41, 34, |
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
51 27, 20, 13, 6, 7, 14, 21, 28, |
34 | 52 35, 42, 49, 56, 57, 50, 43, 36, |
53 29, 22, 15, 23, 30, 37, 44, 51, | |
54 58, 59, 52, 45, 38, 31, 39, 46, | |
55 53, 60, 61, 54, 47, 55, 62, 63 | |
56 }; | |
57 | |
1567 | 58 /* Specific zigzag scan for 248 idct. NOTE that unlike the |
59 specification, we interleave the fields */ | |
60 const uint8_t ff_zigzag248_direct[64] = { | |
61 0, 8, 1, 9, 16, 24, 2, 10, | |
62 17, 25, 32, 40, 48, 56, 33, 41, | |
63 18, 26, 3, 11, 4, 12, 19, 27, | |
64 34, 42, 49, 57, 50, 58, 35, 43, | |
65 20, 28, 5, 13, 6, 14, 21, 29, | |
66 36, 44, 51, 59, 52, 60, 37, 45, | |
67 22, 30, 7, 15, 23, 31, 38, 46, | |
68 53, 61, 54, 62, 39, 47, 55, 63, | |
69 }; | |
70 | |
220 | 71 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ |
4197 | 72 DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16[64]) = {0, }; |
220 | 73 |
1064 | 74 const uint8_t ff_alternate_horizontal_scan[64] = { |
2967 | 75 0, 1, 2, 3, 8, 9, 16, 17, |
34 | 76 10, 11, 4, 5, 6, 7, 15, 14, |
2967 | 77 13, 12, 19, 18, 24, 25, 32, 33, |
34 | 78 26, 27, 20, 21, 22, 23, 28, 29, |
2967 | 79 30, 31, 34, 35, 40, 41, 48, 49, |
34 | 80 42, 43, 36, 37, 38, 39, 44, 45, |
2967 | 81 46, 47, 50, 51, 56, 57, 58, 59, |
34 | 82 52, 53, 54, 55, 60, 61, 62, 63, |
83 }; | |
84 | |
1064 | 85 const uint8_t ff_alternate_vertical_scan[64] = { |
2967 | 86 0, 8, 16, 24, 1, 9, 2, 10, |
34 | 87 17, 25, 32, 40, 48, 56, 57, 49, |
2967 | 88 41, 33, 26, 18, 3, 11, 4, 12, |
34 | 89 19, 27, 34, 42, 50, 58, 35, 43, |
2967 | 90 51, 59, 20, 28, 5, 13, 6, 14, |
34 | 91 21, 29, 36, 44, 52, 60, 37, 45, |
2967 | 92 53, 61, 22, 30, 7, 15, 23, 31, |
34 | 93 38, 46, 54, 62, 39, 47, 55, 63, |
94 }; | |
95 | |
220 | 96 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ |
4174 | 97 const uint32_t ff_inverse[256]={ |
2967 | 98 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, |
99 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, | |
100 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, | |
101 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, | |
102 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, | |
103 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, | |
104 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, | |
105 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, | |
106 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, | |
107 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, | |
108 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, | |
109 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, | |
110 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, | |
111 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, | |
112 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, | |
113 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, | |
114 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, | |
115 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, | |
116 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, | |
117 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, | |
118 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, | |
119 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, | |
120 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, | |
121 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, | |
122 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, | |
123 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, | |
124 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, | |
125 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, | |
126 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, | |
127 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, | |
128 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, | |
220 | 129 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, |
130 }; | |
131 | |
1092 | 132 /* Input permutation for the simple_idct_mmx */ |
133 static const uint8_t simple_mmx_permutation[64]={ | |
2979 | 134 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, |
135 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |
136 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |
137 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |
138 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |
139 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |
140 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |
141 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |
1092 | 142 }; |
143 | |
1064 | 144 static int pix_sum_c(uint8_t * pix, int line_size) |
612 | 145 { |
146 int s, i, j; | |
147 | |
148 s = 0; | |
149 for (i = 0; i < 16; i++) { | |
2979 | 150 for (j = 0; j < 16; j += 8) { |
151 s += pix[0]; | |
152 s += pix[1]; | |
153 s += pix[2]; | |
154 s += pix[3]; | |
155 s += pix[4]; | |
156 s += pix[5]; | |
157 s += pix[6]; | |
158 s += pix[7]; | |
159 pix += 8; | |
160 } | |
161 pix += line_size - 16; | |
612 | 162 } |
163 return s; | |
164 } | |
165 | |
1064 | 166 static int pix_norm1_c(uint8_t * pix, int line_size) |
612 | 167 { |
168 int s, i, j; | |
4179 | 169 uint32_t *sq = ff_squareTbl + 256; |
612 | 170 |
171 s = 0; | |
172 for (i = 0; i < 16; i++) { | |
2979 | 173 for (j = 0; j < 16; j += 8) { |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
174 #if 0 |
2979 | 175 s += sq[pix[0]]; |
176 s += sq[pix[1]]; | |
177 s += sq[pix[2]]; | |
178 s += sq[pix[3]]; | |
179 s += sq[pix[4]]; | |
180 s += sq[pix[5]]; | |
181 s += sq[pix[6]]; | |
182 s += sq[pix[7]]; | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
183 #else |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
184 #if LONG_MAX > 2147483647 |
2979 | 185 register uint64_t x=*(uint64_t*)pix; |
186 s += sq[x&0xff]; | |
187 s += sq[(x>>8)&0xff]; | |
188 s += sq[(x>>16)&0xff]; | |
189 s += sq[(x>>24)&0xff]; | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
190 s += sq[(x>>32)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
191 s += sq[(x>>40)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
192 s += sq[(x>>48)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
193 s += sq[(x>>56)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
194 #else |
2979 | 195 register uint32_t x=*(uint32_t*)pix; |
196 s += sq[x&0xff]; | |
197 s += sq[(x>>8)&0xff]; | |
198 s += sq[(x>>16)&0xff]; | |
199 s += sq[(x>>24)&0xff]; | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
200 x=*(uint32_t*)(pix+4); |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
201 s += sq[x&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
202 s += sq[(x>>8)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
203 s += sq[(x>>16)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
204 s += sq[(x>>24)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
205 #endif |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
206 #endif |
2979 | 207 pix += 8; |
208 } | |
209 pix += line_size - 16; | |
612 | 210 } |
211 return s; | |
212 } | |
213 | |
1273 | 214 static void bswap_buf(uint32_t *dst, uint32_t *src, int w){ |
215 int i; | |
2967 | 216 |
1273 | 217 for(i=0; i+8<=w; i+=8){ |
218 dst[i+0]= bswap_32(src[i+0]); | |
219 dst[i+1]= bswap_32(src[i+1]); | |
220 dst[i+2]= bswap_32(src[i+2]); | |
221 dst[i+3]= bswap_32(src[i+3]); | |
222 dst[i+4]= bswap_32(src[i+4]); | |
223 dst[i+5]= bswap_32(src[i+5]); | |
224 dst[i+6]= bswap_32(src[i+6]); | |
225 dst[i+7]= bswap_32(src[i+7]); | |
226 } | |
227 for(;i<w; i++){ | |
228 dst[i+0]= bswap_32(src[i+0]); | |
229 } | |
230 } | |
612 | 231 |
2184 | 232 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) |
233 { | |
234 int s, i; | |
4179 | 235 uint32_t *sq = ff_squareTbl + 256; |
2184 | 236 |
237 s = 0; | |
238 for (i = 0; i < h; i++) { | |
239 s += sq[pix1[0] - pix2[0]]; | |
240 s += sq[pix1[1] - pix2[1]]; | |
241 s += sq[pix1[2] - pix2[2]]; | |
242 s += sq[pix1[3] - pix2[3]]; | |
243 pix1 += line_size; | |
244 pix2 += line_size; | |
245 } | |
246 return s; | |
247 } | |
248 | |
1708 | 249 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) |
936 | 250 { |
251 int s, i; | |
4179 | 252 uint32_t *sq = ff_squareTbl + 256; |
936 | 253 |
254 s = 0; | |
1708 | 255 for (i = 0; i < h; i++) { |
936 | 256 s += sq[pix1[0] - pix2[0]]; |
257 s += sq[pix1[1] - pix2[1]]; | |
258 s += sq[pix1[2] - pix2[2]]; | |
259 s += sq[pix1[3] - pix2[3]]; | |
260 s += sq[pix1[4] - pix2[4]]; | |
261 s += sq[pix1[5] - pix2[5]]; | |
262 s += sq[pix1[6] - pix2[6]]; | |
263 s += sq[pix1[7] - pix2[7]]; | |
264 pix1 += line_size; | |
265 pix2 += line_size; | |
266 } | |
267 return s; | |
268 } | |
269 | |
1708 | 270 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
884 | 271 { |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
272 int s, i; |
4179 | 273 uint32_t *sq = ff_squareTbl + 256; |
884 | 274 |
275 s = 0; | |
1708 | 276 for (i = 0; i < h; i++) { |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
277 s += sq[pix1[ 0] - pix2[ 0]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
278 s += sq[pix1[ 1] - pix2[ 1]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
279 s += sq[pix1[ 2] - pix2[ 2]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
280 s += sq[pix1[ 3] - pix2[ 3]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
281 s += sq[pix1[ 4] - pix2[ 4]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
282 s += sq[pix1[ 5] - pix2[ 5]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
283 s += sq[pix1[ 6] - pix2[ 6]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
284 s += sq[pix1[ 7] - pix2[ 7]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
285 s += sq[pix1[ 8] - pix2[ 8]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
286 s += sq[pix1[ 9] - pix2[ 9]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
287 s += sq[pix1[10] - pix2[10]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
288 s += sq[pix1[11] - pix2[11]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
289 s += sq[pix1[12] - pix2[12]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
290 s += sq[pix1[13] - pix2[13]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
291 s += sq[pix1[14] - pix2[14]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
292 s += sq[pix1[15] - pix2[15]]; |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
293 |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
294 pix1 += line_size; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
295 pix2 += line_size; |
884 | 296 } |
297 return s; | |
298 } | |
299 | |
2184 | 300 |
3373
b8996cc5ccae
Disable w53 and w97 cmp methods when snow encoder is disabled
gpoirier
parents:
3323
diff
changeset
|
301 #ifdef CONFIG_SNOW_ENCODER //dwt is in snow.c |
2184 | 302 static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){ |
303 int s, i, j; | |
304 const int dec_count= w==8 ? 3 : 4; | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
305 int tmp[32*32]; |
2184 | 306 int level, ori; |
2967 | 307 static const int scale[2][2][4][4]={ |
2184 | 308 { |
309 { | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
310 // 9/7 8x8 dec=3 |
2184 | 311 {268, 239, 239, 213}, |
312 { 0, 224, 224, 152}, | |
313 { 0, 135, 135, 110}, | |
314 },{ | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
315 // 9/7 16x16 or 32x32 dec=4 |
2184 | 316 {344, 310, 310, 280}, |
317 { 0, 320, 320, 228}, | |
318 { 0, 175, 175, 136}, | |
319 { 0, 129, 129, 102}, | |
320 } | |
321 },{ | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
322 { |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
323 // 5/3 8x8 dec=3 |
2184 | 324 {275, 245, 245, 218}, |
325 { 0, 230, 230, 156}, | |
326 { 0, 138, 138, 113}, | |
327 },{ | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
328 // 5/3 16x16 or 32x32 dec=4 |
2184 | 329 {352, 317, 317, 286}, |
330 { 0, 328, 328, 233}, | |
331 { 0, 180, 180, 140}, | |
332 { 0, 132, 132, 105}, | |
333 } | |
334 } | |
335 }; | |
336 | |
337 for (i = 0; i < h; i++) { | |
338 for (j = 0; j < w; j+=4) { | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
339 tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
340 tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
341 tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
342 tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4; |
2184 | 343 } |
344 pix1 += line_size; | |
345 pix2 += line_size; | |
346 } | |
2639 | 347 |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
348 ff_spatial_dwt(tmp, w, h, 32, type, dec_count); |
2184 | 349 |
350 s=0; | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
351 assert(w==h); |
2184 | 352 for(level=0; level<dec_count; level++){ |
353 for(ori= level ? 1 : 0; ori<4; ori++){ | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
354 int size= w>>(dec_count-level); |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
355 int sx= (ori&1) ? size : 0; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
356 int stride= 32<<(dec_count-level); |
2184 | 357 int sy= (ori&2) ? stride>>1 : 0; |
2967 | 358 |
2184 | 359 for(i=0; i<size; i++){ |
360 for(j=0; j<size; j++){ | |
361 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori]; | |
4001 | 362 s += FFABS(v); |
2184 | 363 } |
364 } | |
365 } | |
366 } | |
2967 | 367 assert(s>=0); |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
368 return s>>9; |
2184 | 369 } |
370 | |
371 static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | |
372 return w_c(v, pix1, pix2, line_size, 8, h, 1); | |
373 } | |
374 | |
375 static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | |
376 return w_c(v, pix1, pix2, line_size, 8, h, 0); | |
377 } | |
378 | |
379 static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | |
380 return w_c(v, pix1, pix2, line_size, 16, h, 1); | |
381 } | |
382 | |
383 static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | |
384 return w_c(v, pix1, pix2, line_size, 16, h, 0); | |
385 } | |
386 | |
4197 | 387 int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
388 return w_c(v, pix1, pix2, line_size, 32, h, 1); |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
389 } |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
390 |
4197 | 391 int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
392 return w_c(v, pix1, pix2, line_size, 32, h, 0); |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
393 } |
3373
b8996cc5ccae
Disable w53 and w97 cmp methods when snow encoder is disabled
gpoirier
parents:
3323
diff
changeset
|
394 #endif |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
395 |
1064 | 396 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) |
0 | 397 { |
398 int i; | |
399 | |
400 /* read the pixels */ | |
401 for(i=0;i<8;i++) { | |
516 | 402 block[0] = pixels[0]; |
403 block[1] = pixels[1]; | |
404 block[2] = pixels[2]; | |
405 block[3] = pixels[3]; | |
406 block[4] = pixels[4]; | |
407 block[5] = pixels[5]; | |
408 block[6] = pixels[6]; | |
409 block[7] = pixels[7]; | |
410 pixels += line_size; | |
411 block += 8; | |
0 | 412 } |
413 } | |
414 | |
1064 | 415 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, |
2979 | 416 const uint8_t *s2, int stride){ |
324 | 417 int i; |
418 | |
419 /* read the pixels */ | |
420 for(i=0;i<8;i++) { | |
516 | 421 block[0] = s1[0] - s2[0]; |
422 block[1] = s1[1] - s2[1]; | |
423 block[2] = s1[2] - s2[2]; | |
424 block[3] = s1[3] - s2[3]; | |
425 block[4] = s1[4] - s2[4]; | |
426 block[5] = s1[5] - s2[5]; | |
427 block[6] = s1[6] - s2[6]; | |
428 block[7] = s1[7] - s2[7]; | |
324 | 429 s1 += stride; |
430 s2 += stride; | |
516 | 431 block += 8; |
324 | 432 } |
433 } | |
434 | |
435 | |
1064 | 436 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
2979 | 437 int line_size) |
0 | 438 { |
439 int i; | |
4176 | 440 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 441 |
0 | 442 /* read the pixels */ |
443 for(i=0;i<8;i++) { | |
516 | 444 pixels[0] = cm[block[0]]; |
445 pixels[1] = cm[block[1]]; | |
446 pixels[2] = cm[block[2]]; | |
447 pixels[3] = cm[block[3]]; | |
448 pixels[4] = cm[block[4]]; | |
449 pixels[5] = cm[block[5]]; | |
450 pixels[6] = cm[block[6]]; | |
451 pixels[7] = cm[block[7]]; | |
452 | |
453 pixels += line_size; | |
454 block += 8; | |
0 | 455 } |
456 } | |
457 | |
2256 | 458 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, |
2979 | 459 int line_size) |
2256 | 460 { |
461 int i; | |
4176 | 462 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 463 |
2256 | 464 /* read the pixels */ |
465 for(i=0;i<4;i++) { | |
466 pixels[0] = cm[block[0]]; | |
467 pixels[1] = cm[block[1]]; | |
468 pixels[2] = cm[block[2]]; | |
469 pixels[3] = cm[block[3]]; | |
470 | |
471 pixels += line_size; | |
472 block += 8; | |
473 } | |
474 } | |
475 | |
2257 | 476 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, |
2979 | 477 int line_size) |
2257 | 478 { |
479 int i; | |
4176 | 480 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 481 |
2257 | 482 /* read the pixels */ |
483 for(i=0;i<2;i++) { | |
484 pixels[0] = cm[block[0]]; | |
485 pixels[1] = cm[block[1]]; | |
486 | |
487 pixels += line_size; | |
488 block += 8; | |
489 } | |
490 } | |
491 | |
2967 | 492 static void put_signed_pixels_clamped_c(const DCTELEM *block, |
1984
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
493 uint8_t *restrict pixels, |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
494 int line_size) |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
495 { |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
496 int i, j; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
497 |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
498 for (i = 0; i < 8; i++) { |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
499 for (j = 0; j < 8; j++) { |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
500 if (*block < -128) |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
501 *pixels = 0; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
502 else if (*block > 127) |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
503 *pixels = 255; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
504 else |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
505 *pixels = (uint8_t)(*block + 128); |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
506 block++; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
507 pixels++; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
508 } |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
509 pixels += (line_size - 8); |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
510 } |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
511 } |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
512 |
1064 | 513 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
516 | 514 int line_size) |
0 | 515 { |
516 int i; | |
4176 | 517 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 518 |
0 | 519 /* read the pixels */ |
520 for(i=0;i<8;i++) { | |
516 | 521 pixels[0] = cm[pixels[0] + block[0]]; |
522 pixels[1] = cm[pixels[1] + block[1]]; | |
523 pixels[2] = cm[pixels[2] + block[2]]; | |
524 pixels[3] = cm[pixels[3] + block[3]]; | |
525 pixels[4] = cm[pixels[4] + block[4]]; | |
526 pixels[5] = cm[pixels[5] + block[5]]; | |
527 pixels[6] = cm[pixels[6] + block[6]]; | |
528 pixels[7] = cm[pixels[7] + block[7]]; | |
529 pixels += line_size; | |
530 block += 8; | |
0 | 531 } |
532 } | |
2256 | 533 |
534 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, | |
535 int line_size) | |
536 { | |
537 int i; | |
4176 | 538 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 539 |
2256 | 540 /* read the pixels */ |
541 for(i=0;i<4;i++) { | |
542 pixels[0] = cm[pixels[0] + block[0]]; | |
543 pixels[1] = cm[pixels[1] + block[1]]; | |
544 pixels[2] = cm[pixels[2] + block[2]]; | |
545 pixels[3] = cm[pixels[3] + block[3]]; | |
546 pixels += line_size; | |
547 block += 8; | |
548 } | |
549 } | |
2257 | 550 |
551 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, | |
552 int line_size) | |
553 { | |
554 int i; | |
4176 | 555 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 556 |
2257 | 557 /* read the pixels */ |
558 for(i=0;i<2;i++) { | |
559 pixels[0] = cm[pixels[0] + block[0]]; | |
560 pixels[1] = cm[pixels[1] + block[1]]; | |
561 pixels += line_size; | |
562 block += 8; | |
563 } | |
564 } | |
2763 | 565 |
566 static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) | |
567 { | |
568 int i; | |
569 for(i=0;i<8;i++) { | |
570 pixels[0] += block[0]; | |
571 pixels[1] += block[1]; | |
572 pixels[2] += block[2]; | |
573 pixels[3] += block[3]; | |
574 pixels[4] += block[4]; | |
575 pixels[5] += block[5]; | |
576 pixels[6] += block[6]; | |
577 pixels[7] += block[7]; | |
578 pixels += line_size; | |
579 block += 8; | |
580 } | |
581 } | |
582 | |
583 static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) | |
584 { | |
585 int i; | |
586 for(i=0;i<4;i++) { | |
587 pixels[0] += block[0]; | |
588 pixels[1] += block[1]; | |
589 pixels[2] += block[2]; | |
590 pixels[3] += block[3]; | |
591 pixels += line_size; | |
592 block += 4; | |
593 } | |
594 } | |
595 | |
4988
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
596 static int sum_abs_dctelem_c(DCTELEM *block) |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
597 { |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
598 int sum=0, i; |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
599 for(i=0; i<64; i++) |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
600 sum+= FFABS(block[i]); |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
601 return sum; |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
602 } |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
603 |
385 | 604 #if 0 |
605 | |
606 #define PIXOP2(OPNAME, OP) \ | |
651 | 607 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 608 {\ |
609 int i;\ | |
610 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
611 OP(*((uint64_t*)block), AV_RN64(pixels));\ |
385 | 612 pixels+=line_size;\ |
613 block +=line_size;\ | |
614 }\ | |
615 }\ | |
616 \ | |
859 | 617 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 618 {\ |
619 int i;\ | |
620 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
621 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
622 const uint64_t b= AV_RN64(pixels+1);\ |
385 | 623 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
624 pixels+=line_size;\ | |
625 block +=line_size;\ | |
626 }\ | |
627 }\ | |
628 \ | |
859 | 629 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 630 {\ |
631 int i;\ | |
632 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
633 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
634 const uint64_t b= AV_RN64(pixels+1);\ |
385 | 635 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
636 pixels+=line_size;\ | |
637 block +=line_size;\ | |
638 }\ | |
639 }\ | |
640 \ | |
859 | 641 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 642 {\ |
643 int i;\ | |
644 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
645 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
646 const uint64_t b= AV_RN64(pixels+line_size);\ |
385 | 647 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
648 pixels+=line_size;\ | |
649 block +=line_size;\ | |
650 }\ | |
651 }\ | |
652 \ | |
859 | 653 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 654 {\ |
655 int i;\ | |
656 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
657 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
658 const uint64_t b= AV_RN64(pixels+line_size);\ |
385 | 659 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
660 pixels+=line_size;\ | |
661 block +=line_size;\ | |
662 }\ | |
663 }\ | |
664 \ | |
859 | 665 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 666 {\ |
667 int i;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
668 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
669 const uint64_t b= AV_RN64(pixels+1);\ |
385 | 670 uint64_t l0= (a&0x0303030303030303ULL)\ |
671 + (b&0x0303030303030303ULL)\ | |
672 + 0x0202020202020202ULL;\ | |
673 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
674 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
675 uint64_t l1,h1;\ | |
676 \ | |
677 pixels+=line_size;\ | |
678 for(i=0; i<h; i+=2){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
679 uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
680 uint64_t b= AV_RN64(pixels+1);\ |
385 | 681 l1= (a&0x0303030303030303ULL)\ |
682 + (b&0x0303030303030303ULL);\ | |
683 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
684 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
685 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
686 pixels+=line_size;\ | |
687 block +=line_size;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
688 a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
689 b= AV_RN64(pixels+1);\ |
385 | 690 l0= (a&0x0303030303030303ULL)\ |
691 + (b&0x0303030303030303ULL)\ | |
692 + 0x0202020202020202ULL;\ | |
693 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
694 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
695 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
696 pixels+=line_size;\ | |
697 block +=line_size;\ | |
698 }\ | |
699 }\ | |
700 \ | |
859 | 701 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 702 {\ |
703 int i;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
704 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
705 const uint64_t b= AV_RN64(pixels+1);\ |
385 | 706 uint64_t l0= (a&0x0303030303030303ULL)\ |
707 + (b&0x0303030303030303ULL)\ | |
708 + 0x0101010101010101ULL;\ | |
709 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
710 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
711 uint64_t l1,h1;\ | |
712 \ | |
713 pixels+=line_size;\ | |
714 for(i=0; i<h; i+=2){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
715 uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
716 uint64_t b= AV_RN64(pixels+1);\ |
385 | 717 l1= (a&0x0303030303030303ULL)\ |
718 + (b&0x0303030303030303ULL);\ | |
719 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
720 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
721 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
722 pixels+=line_size;\ | |
723 block +=line_size;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
724 a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
725 b= AV_RN64(pixels+1);\ |
385 | 726 l0= (a&0x0303030303030303ULL)\ |
727 + (b&0x0303030303030303ULL)\ | |
728 + 0x0101010101010101ULL;\ | |
729 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
730 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
731 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
732 pixels+=line_size;\ | |
733 block +=line_size;\ | |
734 }\ | |
735 }\ | |
736 \ | |
859 | 737 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\ |
738 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\ | |
739 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\ | |
740 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\ | |
741 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\ | |
742 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\ | |
743 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8) | |
385 | 744 |
745 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) | |
746 #else // 64 bit variant | |
747 | |
748 #define PIXOP2(OPNAME, OP) \ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
749 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
750 int i;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
751 for(i=0; i<h; i++){\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
752 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
753 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
754 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
755 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
756 }\ |
1168 | 757 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
758 int i;\ | |
759 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
760 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ |
1168 | 761 pixels+=line_size;\ |
762 block +=line_size;\ | |
763 }\ | |
764 }\ | |
859 | 765 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
385 | 766 int i;\ |
767 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
768 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
769 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\ |
385 | 770 pixels+=line_size;\ |
771 block +=line_size;\ | |
772 }\ | |
773 }\ | |
859 | 774 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
775 OPNAME ## _pixels8_c(block, pixels, line_size, h);\ | |
651 | 776 }\ |
385 | 777 \ |
651 | 778 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
779 int src_stride1, int src_stride2, int h){\ | |
385 | 780 int i;\ |
781 for(i=0; i<h; i++){\ | |
651 | 782 uint32_t a,b;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
783 a= AV_RN32(&src1[i*src_stride1 ]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
784 b= AV_RN32(&src2[i*src_stride2 ]);\ |
1264 | 785 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
786 a= AV_RN32(&src1[i*src_stride1+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
787 b= AV_RN32(&src2[i*src_stride2+4]);\ |
1264 | 788 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ |
385 | 789 }\ |
790 }\ | |
791 \ | |
651 | 792 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
793 int src_stride1, int src_stride2, int h){\ | |
385 | 794 int i;\ |
795 for(i=0; i<h; i++){\ | |
651 | 796 uint32_t a,b;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
797 a= AV_RN32(&src1[i*src_stride1 ]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
798 b= AV_RN32(&src2[i*src_stride2 ]);\ |
1264 | 799 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
800 a= AV_RN32(&src1[i*src_stride1+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
801 b= AV_RN32(&src2[i*src_stride2+4]);\ |
1264 | 802 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ |
385 | 803 }\ |
804 }\ | |
805 \ | |
1168 | 806 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
807 int src_stride1, int src_stride2, int h){\ | |
808 int i;\ | |
809 for(i=0; i<h; i++){\ | |
810 uint32_t a,b;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
811 a= AV_RN32(&src1[i*src_stride1 ]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
812 b= AV_RN32(&src2[i*src_stride2 ]);\ |
1264 | 813 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
1168 | 814 }\ |
815 }\ | |
816 \ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
817 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
818 int src_stride1, int src_stride2, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
819 int i;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
820 for(i=0; i<h; i++){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
821 uint32_t a,b;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
822 a= AV_RN16(&src1[i*src_stride1 ]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
823 b= AV_RN16(&src2[i*src_stride2 ]);\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
824 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
825 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
826 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
827 \ |
651 | 828 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
829 int src_stride1, int src_stride2, int h){\ | |
830 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | |
831 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | |
832 }\ | |
833 \ | |
834 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | |
835 int src_stride1, int src_stride2, int h){\ | |
836 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | |
837 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | |
838 }\ | |
839 \ | |
859 | 840 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 841 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
842 }\ | |
843 \ | |
859 | 844 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 845 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
846 }\ | |
847 \ | |
859 | 848 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 849 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
850 }\ | |
851 \ | |
859 | 852 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 853 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
385 | 854 }\ |
855 \ | |
651 | 856 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
857 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
858 int i;\ | |
859 for(i=0; i<h; i++){\ | |
860 uint32_t a, b, c, d, l0, l1, h0, h1;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
861 a= AV_RN32(&src1[i*src_stride1]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
862 b= AV_RN32(&src2[i*src_stride2]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
863 c= AV_RN32(&src3[i*src_stride3]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
864 d= AV_RN32(&src4[i*src_stride4]);\ |
651 | 865 l0= (a&0x03030303UL)\ |
866 + (b&0x03030303UL)\ | |
867 + 0x02020202UL;\ | |
868 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
869 + ((b&0xFCFCFCFCUL)>>2);\ | |
870 l1= (c&0x03030303UL)\ | |
871 + (d&0x03030303UL);\ | |
872 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
873 + ((d&0xFCFCFCFCUL)>>2);\ | |
874 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
875 a= AV_RN32(&src1[i*src_stride1+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
876 b= AV_RN32(&src2[i*src_stride2+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
877 c= AV_RN32(&src3[i*src_stride3+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
878 d= AV_RN32(&src4[i*src_stride4+4]);\ |
651 | 879 l0= (a&0x03030303UL)\ |
880 + (b&0x03030303UL)\ | |
881 + 0x02020202UL;\ | |
882 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
883 + ((b&0xFCFCFCFCUL)>>2);\ | |
884 l1= (c&0x03030303UL)\ | |
885 + (d&0x03030303UL);\ | |
886 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
887 + ((d&0xFCFCFCFCUL)>>2);\ | |
888 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
889 }\ | |
890 }\ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
891 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
892 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
893 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
894 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
895 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
896 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
897 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
898 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
899 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
900 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
901 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
902 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
903 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
904 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
905 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
906 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
907 \ |
651 | 908 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
909 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
385 | 910 int i;\ |
911 for(i=0; i<h; i++){\ | |
651 | 912 uint32_t a, b, c, d, l0, l1, h0, h1;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
913 a= AV_RN32(&src1[i*src_stride1]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
914 b= AV_RN32(&src2[i*src_stride2]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
915 c= AV_RN32(&src3[i*src_stride3]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
916 d= AV_RN32(&src4[i*src_stride4]);\ |
651 | 917 l0= (a&0x03030303UL)\ |
918 + (b&0x03030303UL)\ | |
919 + 0x01010101UL;\ | |
920 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
921 + ((b&0xFCFCFCFCUL)>>2);\ | |
922 l1= (c&0x03030303UL)\ | |
923 + (d&0x03030303UL);\ | |
924 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
925 + ((d&0xFCFCFCFCUL)>>2);\ | |
926 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
927 a= AV_RN32(&src1[i*src_stride1+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
928 b= AV_RN32(&src2[i*src_stride2+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
929 c= AV_RN32(&src3[i*src_stride3+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
930 d= AV_RN32(&src4[i*src_stride4+4]);\ |
651 | 931 l0= (a&0x03030303UL)\ |
932 + (b&0x03030303UL)\ | |
933 + 0x01010101UL;\ | |
934 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
935 + ((b&0xFCFCFCFCUL)>>2);\ | |
936 l1= (c&0x03030303UL)\ | |
937 + (d&0x03030303UL);\ | |
938 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
939 + ((d&0xFCFCFCFCUL)>>2);\ | |
940 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
385 | 941 }\ |
942 }\ | |
651 | 943 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
944 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
945 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
946 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
947 }\ | |
948 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ | |
949 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
950 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
951 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
952 }\ | |
385 | 953 \ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
954 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
955 {\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
956 int i, a0, b0, a1, b1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
957 a0= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
958 b0= pixels[1] + 2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
959 a0 += b0;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
960 b0 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
961 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
962 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
963 for(i=0; i<h; i+=2){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
964 a1= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
965 b1= pixels[1];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
966 a1 += b1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
967 b1 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
968 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
969 block[0]= (a1+a0)>>2; /* FIXME non put */\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
970 block[1]= (b1+b0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
971 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
972 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
973 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
974 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
975 a0= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
976 b0= pixels[1] + 2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
977 a0 += b0;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
978 b0 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
979 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
980 block[0]= (a1+a0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
981 block[1]= (b1+b0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
982 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
983 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
984 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
985 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
986 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
987 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
988 {\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
989 int i;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
990 const uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
991 const uint32_t b= AV_RN32(pixels+1);\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
992 uint32_t l0= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
993 + (b&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
994 + 0x02020202UL;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
995 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
996 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
997 uint32_t l1,h1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
998 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
999 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1000 for(i=0; i<h; i+=2){\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1001 uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1002 uint32_t b= AV_RN32(pixels+1);\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1003 l1= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1004 + (b&0x03030303UL);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1005 h1= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1006 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1007 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1008 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1009 block +=line_size;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1010 a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1011 b= AV_RN32(pixels+1);\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1012 l0= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1013 + (b&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1014 + 0x02020202UL;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1015 h0= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1016 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1017 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1018 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1019 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1020 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1021 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1022 \ |
859 | 1023 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 1024 {\ |
1025 int j;\ | |
1026 for(j=0; j<2; j++){\ | |
1027 int i;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1028 const uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1029 const uint32_t b= AV_RN32(pixels+1);\ |
385 | 1030 uint32_t l0= (a&0x03030303UL)\ |
1031 + (b&0x03030303UL)\ | |
1032 + 0x02020202UL;\ | |
1033 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1034 + ((b&0xFCFCFCFCUL)>>2);\ | |
1035 uint32_t l1,h1;\ | |
1036 \ | |
1037 pixels+=line_size;\ | |
1038 for(i=0; i<h; i+=2){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1039 uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1040 uint32_t b= AV_RN32(pixels+1);\ |
385 | 1041 l1= (a&0x03030303UL)\ |
1042 + (b&0x03030303UL);\ | |
1043 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
1044 + ((b&0xFCFCFCFCUL)>>2);\ | |
1045 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1046 pixels+=line_size;\ | |
1047 block +=line_size;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1048 a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1049 b= AV_RN32(pixels+1);\ |
385 | 1050 l0= (a&0x03030303UL)\ |
1051 + (b&0x03030303UL)\ | |
1052 + 0x02020202UL;\ | |
1053 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1054 + ((b&0xFCFCFCFCUL)>>2);\ | |
1055 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1056 pixels+=line_size;\ | |
1057 block +=line_size;\ | |
1058 }\ | |
1059 pixels+=4-line_size*(h+1);\ | |
1060 block +=4-line_size*h;\ | |
1061 }\ | |
1062 }\ | |
1063 \ | |
859 | 1064 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 1065 {\ |
1066 int j;\ | |
1067 for(j=0; j<2; j++){\ | |
1068 int i;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1069 const uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1070 const uint32_t b= AV_RN32(pixels+1);\ |
385 | 1071 uint32_t l0= (a&0x03030303UL)\ |
1072 + (b&0x03030303UL)\ | |
1073 + 0x01010101UL;\ | |
1074 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1075 + ((b&0xFCFCFCFCUL)>>2);\ | |
1076 uint32_t l1,h1;\ | |
1077 \ | |
1078 pixels+=line_size;\ | |
1079 for(i=0; i<h; i+=2){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1080 uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1081 uint32_t b= AV_RN32(pixels+1);\ |
385 | 1082 l1= (a&0x03030303UL)\ |
1083 + (b&0x03030303UL);\ | |
1084 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
1085 + ((b&0xFCFCFCFCUL)>>2);\ | |
1086 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1087 pixels+=line_size;\ | |
1088 block +=line_size;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1089 a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1090 b= AV_RN32(pixels+1);\ |
385 | 1091 l0= (a&0x03030303UL)\ |
1092 + (b&0x03030303UL)\ | |
1093 + 0x01010101UL;\ | |
1094 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1095 + ((b&0xFCFCFCFCUL)>>2);\ | |
1096 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1097 pixels+=line_size;\ | |
1098 block +=line_size;\ | |
1099 }\ | |
1100 pixels+=4-line_size*(h+1);\ | |
1101 block +=4-line_size*h;\ | |
1102 }\ | |
1103 }\ | |
1104 \ | |
859 | 1105 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\ |
1106 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\ | |
1107 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\ | |
1108 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\ | |
1109 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\ | |
1110 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\ | |
1111 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ | |
1112 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ | |
651 | 1113 |
1264 | 1114 #define op_avg(a, b) a = rnd_avg32(a, b) |
385 | 1115 #endif |
1116 #define op_put(a, b) a = b | |
1117 | |
1118 PIXOP2(avg, op_avg) | |
1119 PIXOP2(put, op_put) | |
1120 #undef op_avg | |
1121 #undef op_put | |
1122 | |
0 | 1123 #define avg2(a,b) ((a+b+1)>>1) |
1124 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) | |
1125 | |
1864 | 1126 static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ |
1127 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h); | |
1128 } | |
1129 | |
1130 static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ | |
1131 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h); | |
1132 } | |
753 | 1133 |
1064 | 1134 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) |
255 | 1135 { |
1136 const int A=(16-x16)*(16-y16); | |
1137 const int B=( x16)*(16-y16); | |
1138 const int C=(16-x16)*( y16); | |
1139 const int D=( x16)*( y16); | |
1140 int i; | |
1141 | |
1142 for(i=0; i<h; i++) | |
1143 { | |
651 | 1144 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; |
1145 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; | |
1146 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; | |
1147 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8; | |
1148 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8; | |
1149 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8; | |
1150 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; | |
1151 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; | |
1152 dst+= stride; | |
1153 src+= stride; | |
255 | 1154 } |
1155 } | |
1156 | |
3248
7aa9f80e7954
mmx implementation of 3-point GMC. (5x faster than C)
lorenm
parents:
3245
diff
changeset
|
1157 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, |
753 | 1158 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) |
1159 { | |
1160 int y, vx, vy; | |
1161 const int s= 1<<shift; | |
2967 | 1162 |
753 | 1163 width--; |
1164 height--; | |
1165 | |
1166 for(y=0; y<h; y++){ | |
1167 int x; | |
1168 | |
1169 vx= ox; | |
1170 vy= oy; | |
1171 for(x=0; x<8; x++){ //XXX FIXME optimize | |
1172 int src_x, src_y, frac_x, frac_y, index; | |
1173 | |
1174 src_x= vx>>16; | |
1175 src_y= vy>>16; | |
1176 frac_x= src_x&(s-1); | |
1177 frac_y= src_y&(s-1); | |
1178 src_x>>=shift; | |
1179 src_y>>=shift; | |
2967 | 1180 |
753 | 1181 if((unsigned)src_x < width){ |
1182 if((unsigned)src_y < height){ | |
1183 index= src_x + src_y*stride; | |
1184 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) | |
1185 + src[index +1]* frac_x )*(s-frac_y) | |
1186 + ( src[index+stride ]*(s-frac_x) | |
1187 + src[index+stride+1]* frac_x )* frac_y | |
1188 + r)>>(shift*2); | |
1189 }else{ | |
4594 | 1190 index= src_x + av_clip(src_y, 0, height)*stride; |
2967 | 1191 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) |
753 | 1192 + src[index +1]* frac_x )*s |
1193 + r)>>(shift*2); | |
1194 } | |
1195 }else{ | |
1196 if((unsigned)src_y < height){ | |
4594 | 1197 index= av_clip(src_x, 0, width) + src_y*stride; |
2967 | 1198 dst[y*stride + x]= ( ( src[index ]*(s-frac_y) |
753 | 1199 + src[index+stride ]* frac_y )*s |
1200 + r)>>(shift*2); | |
1201 }else{ | |
4594 | 1202 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride; |
753 | 1203 dst[y*stride + x]= src[index ]; |
1204 } | |
1205 } | |
2967 | 1206 |
753 | 1207 vx+= dxx; |
1208 vy+= dyx; | |
1209 } | |
1210 ox += dxy; | |
1211 oy += dyy; | |
1212 } | |
1213 } | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1214 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1215 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1216 switch(width){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1217 case 2: put_pixels2_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1218 case 4: put_pixels4_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1219 case 8: put_pixels8_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1220 case 16:put_pixels16_c(dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1221 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1222 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1223 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1224 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1225 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1226 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1227 for (j=0; j < width; j++) { |
2979 | 1228 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1229 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1230 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1231 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1232 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1233 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1234 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1235 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1236 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1237 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1238 for (j=0; j < width; j++) { |
2979 | 1239 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1240 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1241 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1242 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1243 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1244 } |
2967 | 1245 |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1246 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1247 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1248 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1249 for (j=0; j < width; j++) { |
2979 | 1250 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1251 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1252 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1253 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1254 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1255 } |
2967 | 1256 |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1257 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1258 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1259 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1260 for (j=0; j < width; j++) { |
2979 | 1261 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1262 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1263 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1264 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1265 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1266 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1267 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1268 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1269 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1270 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1271 for (j=0; j < width; j++) { |
2979 | 1272 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1273 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1274 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1275 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1276 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1277 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1278 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1279 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1280 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1281 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1282 for (j=0; j < width; j++) { |
2979 | 1283 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1284 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1285 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1286 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1287 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1288 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1289 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1290 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1291 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1292 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1293 for (j=0; j < width; j++) { |
2979 | 1294 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1295 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1296 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1297 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1298 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1299 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1300 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1301 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1302 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1303 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1304 for (j=0; j < width; j++) { |
2979 | 1305 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1306 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1307 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1308 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1309 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1310 } |
1319 | 1311 |
1312 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1313 switch(width){ | |
1314 case 2: avg_pixels2_c (dst, src, stride, height); break; | |
1315 case 4: avg_pixels4_c (dst, src, stride, height); break; | |
1316 case 8: avg_pixels8_c (dst, src, stride, height); break; | |
1317 case 16:avg_pixels16_c(dst, src, stride, height); break; | |
1318 } | |
1319 } | |
1320 | |
1321 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1322 int i,j; | |
1323 for (i=0; i < height; i++) { | |
1324 for (j=0; j < width; j++) { | |
2979 | 1325 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; |
1319 | 1326 } |
1327 src += stride; | |
1328 dst += stride; | |
1329 } | |
1330 } | |
1331 | |
1332 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1333 int i,j; | |
1334 for (i=0; i < height; i++) { | |
1335 for (j=0; j < width; j++) { | |
2979 | 1336 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; |
1319 | 1337 } |
1338 src += stride; | |
1339 dst += stride; | |
1340 } | |
1341 } | |
2967 | 1342 |
1319 | 1343 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
1344 int i,j; | |
1345 for (i=0; i < height; i++) { | |
1346 for (j=0; j < width; j++) { | |
2979 | 1347 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; |
1319 | 1348 } |
1349 src += stride; | |
1350 dst += stride; | |
1351 } | |
1352 } | |
2967 | 1353 |
1319 | 1354 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
1355 int i,j; | |
1356 for (i=0; i < height; i++) { | |
1357 for (j=0; j < width; j++) { | |
2979 | 1358 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1359 } |
1360 src += stride; | |
1361 dst += stride; | |
1362 } | |
1363 } | |
1364 | |
1365 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1366 int i,j; | |
1367 for (i=0; i < height; i++) { | |
1368 for (j=0; j < width; j++) { | |
2979 | 1369 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1370 } |
1371 src += stride; | |
1372 dst += stride; | |
1373 } | |
1374 } | |
1375 | |
1376 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1377 int i,j; | |
1378 for (i=0; i < height; i++) { | |
1379 for (j=0; j < width; j++) { | |
2979 | 1380 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; |
1319 | 1381 } |
1382 src += stride; | |
1383 dst += stride; | |
1384 } | |
1385 } | |
1386 | |
1387 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1388 int i,j; | |
1389 for (i=0; i < height; i++) { | |
1390 for (j=0; j < width; j++) { | |
2979 | 1391 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1392 } |
1393 src += stride; | |
1394 dst += stride; | |
1395 } | |
1396 } | |
1397 | |
1398 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1399 int i,j; | |
1400 for (i=0; i < height; i++) { | |
1401 for (j=0; j < width; j++) { | |
2979 | 1402 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1403 } |
1404 src += stride; | |
1405 dst += stride; | |
1406 } | |
1407 } | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1408 #if 0 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1409 #define TPEL_WIDTH(width)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1410 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1411 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1412 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1413 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1414 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1415 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1416 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1417 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1418 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1419 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1420 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1421 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1422 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1423 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1424 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1425 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1426 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1427 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);} |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1428 #endif |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1429 |
1168 | 1430 #define H264_CHROMA_MC(OPNAME, OP)\ |
1431 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1432 const int A=(8-x)*(8-y);\ | |
1433 const int B=( x)*(8-y);\ | |
1434 const int C=(8-x)*( y);\ | |
1435 const int D=( x)*( y);\ | |
1436 int i;\ | |
1437 \ | |
1438 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1439 \ | |
1440 for(i=0; i<h; i++)\ | |
1441 {\ | |
1442 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | |
1443 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1444 dst+= stride;\ | |
1445 src+= stride;\ | |
1446 }\ | |
1447 }\ | |
1448 \ | |
1449 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1450 const int A=(8-x)*(8-y);\ | |
1451 const int B=( x)*(8-y);\ | |
1452 const int C=(8-x)*( y);\ | |
1453 const int D=( x)*( y);\ | |
1454 int i;\ | |
1455 \ | |
1456 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1457 \ | |
1458 for(i=0; i<h; i++)\ | |
1459 {\ | |
1460 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | |
1461 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1462 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | |
1463 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | |
1464 dst+= stride;\ | |
1465 src+= stride;\ | |
1466 }\ | |
1467 }\ | |
1468 \ | |
1469 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1470 const int A=(8-x)*(8-y);\ | |
1471 const int B=( x)*(8-y);\ | |
1472 const int C=(8-x)*( y);\ | |
1473 const int D=( x)*( y);\ | |
1474 int i;\ | |
1475 \ | |
1476 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1477 \ | |
1478 for(i=0; i<h; i++)\ | |
1479 {\ | |
1480 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | |
1481 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1482 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | |
1483 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | |
1484 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ | |
1485 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ | |
1486 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ | |
1487 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ | |
1488 dst+= stride;\ | |
1489 src+= stride;\ | |
1490 }\ | |
1491 } | |
1492 | |
1493 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) | |
1494 #define op_put(a, b) a = (((b) + 32)>>6) | |
1495 | |
1496 H264_CHROMA_MC(put_ , op_put) | |
1497 H264_CHROMA_MC(avg_ , op_avg) | |
1498 #undef op_avg | |
1499 #undef op_put | |
1500 | |
3663 | 1501 static void put_no_rnd_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ |
1502 const int A=(8-x)*(8-y); | |
1503 const int B=( x)*(8-y); | |
1504 const int C=(8-x)*( y); | |
1505 const int D=( x)*( y); | |
1506 int i; | |
1507 | |
1508 assert(x<8 && y<8 && x>=0 && y>=0); | |
1509 | |
1510 for(i=0; i<h; i++) | |
1511 { | |
1512 dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6; | |
1513 dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6; | |
1514 dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6; | |
1515 dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6; | |
1516 dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6; | |
1517 dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6; | |
1518 dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6; | |
1519 dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6; | |
1520 dst+= stride; | |
1521 src+= stride; | |
1522 } | |
1523 } | |
1524 | |
651 | 1525 #define QPEL_MC(r, OPNAME, RND, OP) \ |
1064 | 1526 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
4176 | 1527 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
651 | 1528 int i;\ |
1529 for(i=0; i<h; i++)\ | |
1530 {\ | |
1531 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\ | |
1532 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\ | |
1533 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\ | |
1534 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\ | |
1535 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\ | |
1536 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\ | |
1537 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\ | |
1538 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\ | |
1539 dst+=dstStride;\ | |
1540 src+=srcStride;\ | |
1541 }\ | |
1542 }\ | |
1543 \ | |
1064 | 1544 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
984 | 1545 const int w=8;\ |
4176 | 1546 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
651 | 1547 int i;\ |
1548 for(i=0; i<w; i++)\ | |
1549 {\ | |
1550 const int src0= src[0*srcStride];\ | |
1551 const int src1= src[1*srcStride];\ | |
1552 const int src2= src[2*srcStride];\ | |
1553 const int src3= src[3*srcStride];\ | |
1554 const int src4= src[4*srcStride];\ | |
1555 const int src5= src[5*srcStride];\ | |
1556 const int src6= src[6*srcStride];\ | |
1557 const int src7= src[7*srcStride];\ | |
1558 const int src8= src[8*srcStride];\ | |
1559 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\ | |
1560 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\ | |
1561 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\ | |
1562 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\ | |
1563 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\ | |
1564 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\ | |
1565 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\ | |
1566 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ | |
1567 dst++;\ | |
1568 src++;\ | |
1569 }\ | |
1570 }\ | |
1571 \ | |
1064 | 1572 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
4176 | 1573 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
651 | 1574 int i;\ |
954 | 1575 \ |
651 | 1576 for(i=0; i<h; i++)\ |
1577 {\ | |
1578 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\ | |
1579 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\ | |
1580 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\ | |
1581 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\ | |
1582 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\ | |
1583 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\ | |
1584 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\ | |
1585 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\ | |
1586 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\ | |
1587 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\ | |
1588 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\ | |
1589 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\ | |
1590 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\ | |
1591 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\ | |
1592 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\ | |
1593 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\ | |
1594 dst+=dstStride;\ | |
1595 src+=srcStride;\ | |
1596 }\ | |
255 | 1597 }\ |
1598 \ | |
1064 | 1599 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
4176 | 1600 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
651 | 1601 int i;\ |
954 | 1602 const int w=16;\ |
651 | 1603 for(i=0; i<w; i++)\ |
1604 {\ | |
1605 const int src0= src[0*srcStride];\ | |
1606 const int src1= src[1*srcStride];\ | |
1607 const int src2= src[2*srcStride];\ | |
1608 const int src3= src[3*srcStride];\ | |
1609 const int src4= src[4*srcStride];\ | |
1610 const int src5= src[5*srcStride];\ | |
1611 const int src6= src[6*srcStride];\ | |
1612 const int src7= src[7*srcStride];\ | |
1613 const int src8= src[8*srcStride];\ | |
1614 const int src9= src[9*srcStride];\ | |
1615 const int src10= src[10*srcStride];\ | |
1616 const int src11= src[11*srcStride];\ | |
1617 const int src12= src[12*srcStride];\ | |
1618 const int src13= src[13*srcStride];\ | |
1619 const int src14= src[14*srcStride];\ | |
1620 const int src15= src[15*srcStride];\ | |
1621 const int src16= src[16*srcStride];\ | |
1622 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\ | |
1623 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\ | |
1624 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\ | |
1625 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\ | |
1626 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\ | |
1627 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\ | |
1628 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\ | |
1629 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\ | |
1630 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\ | |
1631 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\ | |
1632 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\ | |
1633 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\ | |
1634 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\ | |
1635 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\ | |
1636 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\ | |
1637 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ | |
1638 dst++;\ | |
1639 src++;\ | |
1640 }\ | |
255 | 1641 }\ |
1642 \ | |
1064 | 1643 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ |
859 | 1644 OPNAME ## pixels8_c(dst, src, stride, 8);\ |
255 | 1645 }\ |
1646 \ | |
1064 | 1647 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ |
1648 uint8_t half[64];\ | |
651 | 1649 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ |
1650 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\ | |
1651 }\ | |
1652 \ | |
1064 | 1653 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ |
651 | 1654 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\ |
255 | 1655 }\ |
1656 \ | |
1064 | 1657 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ |
1658 uint8_t half[64];\ | |
651 | 1659 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ |
1660 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\ | |
1661 }\ | |
1662 \ | |
1064 | 1663 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ |
1664 uint8_t full[16*9];\ | |
1665 uint8_t half[64];\ | |
651 | 1666 copy_block9(full, src, 16, stride, 9);\ |
984 | 1667 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ |
651 | 1668 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\ |
1669 }\ | |
1670 \ | |
1064 | 1671 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ |
1672 uint8_t full[16*9];\ | |
651 | 1673 copy_block9(full, src, 16, stride, 9);\ |
984 | 1674 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ |
255 | 1675 }\ |
1676 \ | |
1064 | 1677 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ |
1678 uint8_t full[16*9];\ | |
1679 uint8_t half[64];\ | |
651 | 1680 copy_block9(full, src, 16, stride, 9);\ |
984 | 1681 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ |
651 | 1682 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\ |
1683 }\ | |
1064 | 1684 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1685 uint8_t full[16*9];\ | |
1686 uint8_t halfH[72];\ | |
1687 uint8_t halfV[64];\ | |
1688 uint8_t halfHV[64];\ | |
651 | 1689 copy_block9(full, src, 16, stride, 9);\ |
1690 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1691 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1692 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1693 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1694 }\ |
1064 | 1695 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ |
1696 uint8_t full[16*9];\ | |
1697 uint8_t halfH[72];\ | |
1698 uint8_t halfHV[64];\ | |
984 | 1699 copy_block9(full, src, 16, stride, 9);\ |
1700 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1701 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1702 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1703 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
1704 }\ | |
1064 | 1705 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1706 uint8_t full[16*9];\ | |
1707 uint8_t halfH[72];\ | |
1708 uint8_t halfV[64];\ | |
1709 uint8_t halfHV[64];\ | |
651 | 1710 copy_block9(full, src, 16, stride, 9);\ |
1711 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1712 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1713 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1714 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1715 }\ |
1064 | 1716 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ |
1717 uint8_t full[16*9];\ | |
1718 uint8_t halfH[72];\ | |
1719 uint8_t halfHV[64];\ | |
984 | 1720 copy_block9(full, src, 16, stride, 9);\ |
1721 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1722 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1723 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1724 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
1725 }\ | |
1064 | 1726 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1727 uint8_t full[16*9];\ | |
1728 uint8_t halfH[72];\ | |
1729 uint8_t halfV[64];\ | |
1730 uint8_t halfHV[64];\ | |
651 | 1731 copy_block9(full, src, 16, stride, 9);\ |
1732 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1733 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1734 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1735 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
1736 }\ | |
1064 | 1737 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ |
1738 uint8_t full[16*9];\ | |
1739 uint8_t halfH[72];\ | |
1740 uint8_t halfHV[64];\ | |
984 | 1741 copy_block9(full, src, 16, stride, 9);\ |
1742 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1743 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1744 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1745 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
1746 }\ | |
1064 | 1747 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1748 uint8_t full[16*9];\ | |
1749 uint8_t halfH[72];\ | |
1750 uint8_t halfV[64];\ | |
1751 uint8_t halfHV[64];\ | |
651 | 1752 copy_block9(full, src, 16, stride, 9);\ |
1753 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\ | |
984 | 1754 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1755 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1756 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1757 }\ |
1064 | 1758 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ |
1759 uint8_t full[16*9];\ | |
1760 uint8_t halfH[72];\ | |
1761 uint8_t halfHV[64];\ | |
984 | 1762 copy_block9(full, src, 16, stride, 9);\ |
1763 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1764 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1765 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1766 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
1767 }\ | |
1064 | 1768 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ |
1769 uint8_t halfH[72];\ | |
1770 uint8_t halfHV[64];\ | |
651 | 1771 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1772 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ |
651 | 1773 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ |
1774 }\ | |
1064 | 1775 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ |
1776 uint8_t halfH[72];\ | |
1777 uint8_t halfHV[64];\ | |
651 | 1778 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1779 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ |
651 | 1780 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ |
1781 }\ | |
1064 | 1782 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1783 uint8_t full[16*9];\ | |
1784 uint8_t halfH[72];\ | |
1785 uint8_t halfV[64];\ | |
1786 uint8_t halfHV[64];\ | |
651 | 1787 copy_block9(full, src, 16, stride, 9);\ |
1788 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1789 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1790 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1791 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ |
255 | 1792 }\ |
1064 | 1793 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ |
1794 uint8_t full[16*9];\ | |
1795 uint8_t halfH[72];\ | |
984 | 1796 copy_block9(full, src, 16, stride, 9);\ |
1797 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1798 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1799 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
1800 }\ | |
1064 | 1801 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1802 uint8_t full[16*9];\ | |
1803 uint8_t halfH[72];\ | |
1804 uint8_t halfV[64];\ | |
1805 uint8_t halfHV[64];\ | |
651 | 1806 copy_block9(full, src, 16, stride, 9);\ |
1807 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1808 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1809 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1810 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ |
1811 }\ | |
1064 | 1812 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ |
1813 uint8_t full[16*9];\ | |
1814 uint8_t halfH[72];\ | |
984 | 1815 copy_block9(full, src, 16, stride, 9);\ |
1816 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1817 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1818 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
1819 }\ | |
1064 | 1820 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ |
1821 uint8_t halfH[72];\ | |
651 | 1822 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1823 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ |
651 | 1824 }\ |
1064 | 1825 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ |
859 | 1826 OPNAME ## pixels16_c(dst, src, stride, 16);\ |
255 | 1827 }\ |
651 | 1828 \ |
1064 | 1829 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ |
1830 uint8_t half[256];\ | |
651 | 1831 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ |
1832 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\ | |
1833 }\ | |
1834 \ | |
1064 | 1835 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ |
651 | 1836 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\ |
1837 }\ | |
1838 \ | |
1064 | 1839 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ |
1840 uint8_t half[256];\ | |
651 | 1841 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ |
1842 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\ | |
1843 }\ | |
1844 \ | |
1064 | 1845 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ |
1846 uint8_t full[24*17];\ | |
1847 uint8_t half[256];\ | |
651 | 1848 copy_block17(full, src, 24, stride, 17);\ |
954 | 1849 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ |
651 | 1850 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\ |
255 | 1851 }\ |
651 | 1852 \ |
1064 | 1853 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ |
1854 uint8_t full[24*17];\ | |
651 | 1855 copy_block17(full, src, 24, stride, 17);\ |
954 | 1856 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\ |
651 | 1857 }\ |
1858 \ | |
1064 | 1859 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ |
1860 uint8_t full[24*17];\ | |
1861 uint8_t half[256];\ | |
651 | 1862 copy_block17(full, src, 24, stride, 17);\ |
954 | 1863 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ |
651 | 1864 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\ |
255 | 1865 }\ |
1064 | 1866 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1867 uint8_t full[24*17];\ | |
1868 uint8_t halfH[272];\ | |
1869 uint8_t halfV[256];\ | |
1870 uint8_t halfHV[256];\ | |
651 | 1871 copy_block17(full, src, 24, stride, 17);\ |
1872 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1873 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
1874 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1875 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
1876 }\ | |
1064 | 1877 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ |
1878 uint8_t full[24*17];\ | |
1879 uint8_t halfH[272];\ | |
1880 uint8_t halfHV[256];\ | |
984 | 1881 copy_block17(full, src, 24, stride, 17);\ |
1882 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1883 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
1884 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1885 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
1886 }\ | |
1064 | 1887 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1888 uint8_t full[24*17];\ | |
1889 uint8_t halfH[272];\ | |
1890 uint8_t halfV[256];\ | |
1891 uint8_t halfHV[256];\ | |
651 | 1892 copy_block17(full, src, 24, stride, 17);\ |
1893 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1894 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
1895 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1896 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
1897 }\ | |
1064 | 1898 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ |
1899 uint8_t full[24*17];\ | |
1900 uint8_t halfH[272];\ | |
1901 uint8_t halfHV[256];\ | |
984 | 1902 copy_block17(full, src, 24, stride, 17);\ |
1903 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1904 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
1905 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1906 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
1907 }\ | |
1064 | 1908 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1909 uint8_t full[24*17];\ | |
1910 uint8_t halfH[272];\ | |
1911 uint8_t halfV[256];\ | |
1912 uint8_t halfHV[256];\ | |
651 | 1913 copy_block17(full, src, 24, stride, 17);\ |
1914 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1915 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
1916 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1917 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
255 | 1918 }\ |
1064 | 1919 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ |
1920 uint8_t full[24*17];\ | |
1921 uint8_t halfH[272];\ | |
1922 uint8_t halfHV[256];\ | |
984 | 1923 copy_block17(full, src, 24, stride, 17);\ |
1924 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1925 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
1926 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1927 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
1928 }\ | |
1064 | 1929 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1930 uint8_t full[24*17];\ | |
1931 uint8_t halfH[272];\ | |
1932 uint8_t halfV[256];\ | |
1933 uint8_t halfHV[256];\ | |
651 | 1934 copy_block17(full, src, 24, stride, 17);\ |
1935 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\ | |
954 | 1936 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
1937 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1938 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
1939 }\ | |
1064 | 1940 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ |
1941 uint8_t full[24*17];\ | |
1942 uint8_t halfH[272];\ | |
1943 uint8_t halfHV[256];\ | |
984 | 1944 copy_block17(full, src, 24, stride, 17);\ |
1945 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1946 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
1947 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1948 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
1949 }\ | |
1064 | 1950 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ |
1951 uint8_t halfH[272];\ | |
1952 uint8_t halfHV[256];\ | |
651 | 1953 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 1954 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ |
651 | 1955 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ |
255 | 1956 }\ |
1064 | 1957 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ |
1958 uint8_t halfH[272];\ | |
1959 uint8_t halfHV[256];\ | |
651 | 1960 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 1961 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ |
651 | 1962 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ |
1963 }\ | |
1064 | 1964 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1965 uint8_t full[24*17];\ | |
1966 uint8_t halfH[272];\ | |
1967 uint8_t halfV[256];\ | |
1968 uint8_t halfHV[256];\ | |
651 | 1969 copy_block17(full, src, 24, stride, 17);\ |
1970 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1971 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
1972 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1973 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ |
255 | 1974 }\ |
1064 | 1975 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ |
1976 uint8_t full[24*17];\ | |
1977 uint8_t halfH[272];\ | |
984 | 1978 copy_block17(full, src, 24, stride, 17);\ |
1979 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1980 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
1981 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
1982 }\ | |
1064 | 1983 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1984 uint8_t full[24*17];\ | |
1985 uint8_t halfH[272];\ | |
1986 uint8_t halfV[256];\ | |
1987 uint8_t halfHV[256];\ | |
651 | 1988 copy_block17(full, src, 24, stride, 17);\ |
1989 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1990 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
1991 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1992 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ |
1993 }\ | |
1064 | 1994 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ |
1995 uint8_t full[24*17];\ | |
1996 uint8_t halfH[272];\ | |
984 | 1997 copy_block17(full, src, 24, stride, 17);\ |
1998 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1999 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
2000 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
2001 }\ | |
1064 | 2002 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ |
2003 uint8_t halfH[272];\ | |
651 | 2004 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 2005 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ |
859 | 2006 } |
255 | 2007 |
651 | 2008 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) |
2009 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1) | |
2010 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
2011 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5] | |
2012 | |
2013 QPEL_MC(0, put_ , _ , op_put) | |
2014 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) | |
2015 QPEL_MC(0, avg_ , _ , op_avg) | |
2016 //QPEL_MC(1, avg_no_rnd , _ , op_avg) | |
2017 #undef op_avg | |
2018 #undef op_avg_no_rnd | |
2019 #undef op_put | |
2020 #undef op_put_no_rnd | |
255 | 2021 |
1168 | 2022 #if 1 |
2023 #define H264_LOWPASS(OPNAME, OP, OP2) \ | |
5151 | 2024 static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2025 const int h=2;\ |
4176 | 2026 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2027 int i;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2028 for(i=0; i<h; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2029 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2030 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2031 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2032 dst+=dstStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2033 src+=srcStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2034 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2035 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2036 \ |
5151 | 2037 static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2038 const int w=2;\ |
4176 | 2039 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2040 int i;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2041 for(i=0; i<w; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2042 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2043 const int srcB= src[-2*srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2044 const int srcA= src[-1*srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2045 const int src0= src[0 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2046 const int src1= src[1 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2047 const int src2= src[2 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2048 const int src3= src[3 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2049 const int src4= src[4 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2050 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2051 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2052 dst++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2053 src++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2054 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2055 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2056 \ |
5151 | 2057 static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2058 const int h=2;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2059 const int w=2;\ |
4176 | 2060 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2061 int i;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2062 src -= 2*srcStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2063 for(i=0; i<h+5; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2064 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2065 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2066 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2067 tmp+=tmpStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2068 src+=srcStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2069 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2070 tmp -= tmpStride*(h+5-2);\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2071 for(i=0; i<w; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2072 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2073 const int tmpB= tmp[-2*tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2074 const int tmpA= tmp[-1*tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2075 const int tmp0= tmp[0 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2076 const int tmp1= tmp[1 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2077 const int tmp2= tmp[2 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2078 const int tmp3= tmp[3 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2079 const int tmp4= tmp[4 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2080 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2081 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2082 dst++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2083 tmp++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2084 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2085 }\ |
1168 | 2086 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
2087 const int h=4;\ | |
4176 | 2088 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2089 int i;\ |
2090 for(i=0; i<h; i++)\ | |
2091 {\ | |
2092 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ | |
2093 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ | |
2094 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ | |
2095 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ | |
2096 dst+=dstStride;\ | |
2097 src+=srcStride;\ | |
2098 }\ | |
2099 }\ | |
2100 \ | |
2101 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2102 const int w=4;\ | |
4176 | 2103 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2104 int i;\ |
2105 for(i=0; i<w; i++)\ | |
2106 {\ | |
2107 const int srcB= src[-2*srcStride];\ | |
2108 const int srcA= src[-1*srcStride];\ | |
2109 const int src0= src[0 *srcStride];\ | |
2110 const int src1= src[1 *srcStride];\ | |
2111 const int src2= src[2 *srcStride];\ | |
2112 const int src3= src[3 *srcStride];\ | |
2113 const int src4= src[4 *srcStride];\ | |
2114 const int src5= src[5 *srcStride];\ | |
2115 const int src6= src[6 *srcStride];\ | |
2116 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
2117 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
2118 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
2119 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
2120 dst++;\ | |
2121 src++;\ | |
2122 }\ | |
2123 }\ | |
2124 \ | |
2125 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2126 const int h=4;\ | |
2127 const int w=4;\ | |
4176 | 2128 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2129 int i;\ |
2130 src -= 2*srcStride;\ | |
2131 for(i=0; i<h+5; i++)\ | |
2132 {\ | |
2133 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ | |
2134 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ | |
2135 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\ | |
2136 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\ | |
2137 tmp+=tmpStride;\ | |
2138 src+=srcStride;\ | |
2139 }\ | |
2140 tmp -= tmpStride*(h+5-2);\ | |
2141 for(i=0; i<w; i++)\ | |
2142 {\ | |
2143 const int tmpB= tmp[-2*tmpStride];\ | |
2144 const int tmpA= tmp[-1*tmpStride];\ | |
2145 const int tmp0= tmp[0 *tmpStride];\ | |
2146 const int tmp1= tmp[1 *tmpStride];\ | |
2147 const int tmp2= tmp[2 *tmpStride];\ | |
2148 const int tmp3= tmp[3 *tmpStride];\ | |
2149 const int tmp4= tmp[4 *tmpStride];\ | |
2150 const int tmp5= tmp[5 *tmpStride];\ | |
2151 const int tmp6= tmp[6 *tmpStride];\ | |
2152 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |
2153 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |
2154 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | |
2155 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | |
2156 dst++;\ | |
2157 tmp++;\ | |
2158 }\ | |
2159 }\ | |
2160 \ | |
2161 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2162 const int h=8;\ | |
4176 | 2163 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2164 int i;\ |
2165 for(i=0; i<h; i++)\ | |
2166 {\ | |
2167 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\ | |
2168 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\ | |
2169 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\ | |
2170 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\ | |
2171 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\ | |
2172 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\ | |
2173 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\ | |
2174 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\ | |
2175 dst+=dstStride;\ | |
2176 src+=srcStride;\ | |
2177 }\ | |
2178 }\ | |
2179 \ | |
2180 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2181 const int w=8;\ | |
4176 | 2182 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2183 int i;\ |
2184 for(i=0; i<w; i++)\ | |
2185 {\ | |
2186 const int srcB= src[-2*srcStride];\ | |
2187 const int srcA= src[-1*srcStride];\ | |
2188 const int src0= src[0 *srcStride];\ | |
2189 const int src1= src[1 *srcStride];\ | |
2190 const int src2= src[2 *srcStride];\ | |
2191 const int src3= src[3 *srcStride];\ | |
2192 const int src4= src[4 *srcStride];\ | |
2193 const int src5= src[5 *srcStride];\ | |
2194 const int src6= src[6 *srcStride];\ | |
2195 const int src7= src[7 *srcStride];\ | |
2196 const int src8= src[8 *srcStride];\ | |
2197 const int src9= src[9 *srcStride];\ | |
2198 const int src10=src[10*srcStride];\ | |
2199 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
2200 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
2201 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
2202 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
2203 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ | |
2204 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ | |
2205 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ | |
2206 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ | |
2207 dst++;\ | |
2208 src++;\ | |
2209 }\ | |
2210 }\ | |
2211 \ | |
2212 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2213 const int h=8;\ | |
2214 const int w=8;\ | |
4176 | 2215 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2216 int i;\ |
2217 src -= 2*srcStride;\ | |
2218 for(i=0; i<h+5; i++)\ | |
2219 {\ | |
2220 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\ | |
2221 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\ | |
2222 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\ | |
2223 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\ | |
2224 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\ | |
2225 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\ | |
2226 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\ | |
2227 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\ | |
2228 tmp+=tmpStride;\ | |
2229 src+=srcStride;\ | |
2230 }\ | |
2231 tmp -= tmpStride*(h+5-2);\ | |
2232 for(i=0; i<w; i++)\ | |
2233 {\ | |
2234 const int tmpB= tmp[-2*tmpStride];\ | |
2235 const int tmpA= tmp[-1*tmpStride];\ | |
2236 const int tmp0= tmp[0 *tmpStride];\ | |
2237 const int tmp1= tmp[1 *tmpStride];\ | |
2238 const int tmp2= tmp[2 *tmpStride];\ | |
2239 const int tmp3= tmp[3 *tmpStride];\ | |
2240 const int tmp4= tmp[4 *tmpStride];\ | |
2241 const int tmp5= tmp[5 *tmpStride];\ | |
2242 const int tmp6= tmp[6 *tmpStride];\ | |
2243 const int tmp7= tmp[7 *tmpStride];\ | |
2244 const int tmp8= tmp[8 *tmpStride];\ | |
2245 const int tmp9= tmp[9 *tmpStride];\ | |
2246 const int tmp10=tmp[10*tmpStride];\ | |
2247 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |
2248 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |
2249 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | |
2250 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | |
2251 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\ | |
2252 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\ | |
2253 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\ | |
2254 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\ | |
2255 dst++;\ | |
2256 tmp++;\ | |
2257 }\ | |
2258 }\ | |
2259 \ | |
2260 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2261 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | |
2262 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2263 src += 8*srcStride;\ | |
2264 dst += 8*dstStride;\ | |
2265 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | |
2266 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2267 }\ | |
2268 \ | |
2269 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2270 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | |
2271 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2272 src += 8*srcStride;\ | |
2273 dst += 8*dstStride;\ | |
2274 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | |
2275 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2276 }\ | |
2277 \ | |
2278 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2279 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | |
2280 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | |
2281 src += 8*srcStride;\ | |
2282 dst += 8*dstStride;\ | |
2283 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | |
2284 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | |
2285 }\ | |
2286 | |
2287 #define H264_MC(OPNAME, SIZE) \ | |
2288 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\ | |
2289 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ | |
2290 }\ | |
2291 \ | |
2292 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2293 uint8_t half[SIZE*SIZE];\ | |
2294 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
2295 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\ | |
2296 }\ | |
2297 \ | |
2298 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2299 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ | |
2300 }\ | |
2301 \ | |
2302 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2303 uint8_t half[SIZE*SIZE];\ | |
2304 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
2305 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\ | |
2306 }\ | |
2307 \ | |
2308 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2309 uint8_t full[SIZE*(SIZE+5)];\ | |
2310 uint8_t * const full_mid= full + SIZE*2;\ | |
2311 uint8_t half[SIZE*SIZE];\ | |
2312 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2313 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
2314 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ | |
2315 }\ | |
2316 \ | |
2317 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2318 uint8_t full[SIZE*(SIZE+5)];\ | |
2319 uint8_t * const full_mid= full + SIZE*2;\ | |
2320 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2321 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ | |
2322 }\ | |
2323 \ | |
2324 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2325 uint8_t full[SIZE*(SIZE+5)];\ | |
2326 uint8_t * const full_mid= full + SIZE*2;\ | |
2327 uint8_t half[SIZE*SIZE];\ | |
2328 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2329 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
2330 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ | |
2331 }\ | |
2332 \ | |
2333 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2334 uint8_t full[SIZE*(SIZE+5)];\ | |
2335 uint8_t * const full_mid= full + SIZE*2;\ | |
2336 uint8_t halfH[SIZE*SIZE];\ | |
2337 uint8_t halfV[SIZE*SIZE];\ | |
2338 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2339 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2340 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2341 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2342 }\ | |
2343 \ | |
2344 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2345 uint8_t full[SIZE*(SIZE+5)];\ | |
2346 uint8_t * const full_mid= full + SIZE*2;\ | |
2347 uint8_t halfH[SIZE*SIZE];\ | |
2348 uint8_t halfV[SIZE*SIZE];\ | |
2349 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2350 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2351 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2352 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2353 }\ | |
2354 \ | |
2355 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2356 uint8_t full[SIZE*(SIZE+5)];\ | |
2357 uint8_t * const full_mid= full + SIZE*2;\ | |
2358 uint8_t halfH[SIZE*SIZE];\ | |
2359 uint8_t halfV[SIZE*SIZE];\ | |
2360 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2361 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2362 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2363 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2364 }\ | |
2365 \ | |
2366 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2367 uint8_t full[SIZE*(SIZE+5)];\ | |
2368 uint8_t * const full_mid= full + SIZE*2;\ | |
2369 uint8_t halfH[SIZE*SIZE];\ | |
2370 uint8_t halfV[SIZE*SIZE];\ | |
2371 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2372 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2373 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2374 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2375 }\ | |
2376 \ | |
2377 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2378 int16_t tmp[SIZE*(SIZE+5)];\ | |
2379 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ | |
2380 }\ | |
2381 \ | |
2382 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2383 int16_t tmp[SIZE*(SIZE+5)];\ | |
2384 uint8_t halfH[SIZE*SIZE];\ | |
2385 uint8_t halfHV[SIZE*SIZE];\ | |
2386 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2387 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2388 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2389 }\ | |
2390 \ | |
2391 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2392 int16_t tmp[SIZE*(SIZE+5)];\ | |
2393 uint8_t halfH[SIZE*SIZE];\ | |
2394 uint8_t halfHV[SIZE*SIZE];\ | |
2395 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2396 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2397 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2398 }\ | |
2399 \ | |
2400 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2401 uint8_t full[SIZE*(SIZE+5)];\ | |
2402 uint8_t * const full_mid= full + SIZE*2;\ | |
2403 int16_t tmp[SIZE*(SIZE+5)];\ | |
2404 uint8_t halfV[SIZE*SIZE];\ | |
2405 uint8_t halfHV[SIZE*SIZE];\ | |
2406 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2407 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2408 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2409 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2410 }\ | |
2411 \ | |
2412 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2413 uint8_t full[SIZE*(SIZE+5)];\ | |
2414 uint8_t * const full_mid= full + SIZE*2;\ | |
2415 int16_t tmp[SIZE*(SIZE+5)];\ | |
2416 uint8_t halfV[SIZE*SIZE];\ | |
2417 uint8_t halfHV[SIZE*SIZE];\ | |
2418 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2419 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2420 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2421 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2422 }\ | |
2423 | |
2424 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) | |
2425 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) | |
2426 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
2427 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) | |
2428 #define op2_put(a, b) a = cm[((b) + 512)>>10] | |
2429 | |
2430 H264_LOWPASS(put_ , op_put, op2_put) | |
2431 H264_LOWPASS(avg_ , op_avg, op2_avg) | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2432 H264_MC(put_, 2) |
1168 | 2433 H264_MC(put_, 4) |
2434 H264_MC(put_, 8) | |
2435 H264_MC(put_, 16) | |
2436 H264_MC(avg_, 4) | |
2437 H264_MC(avg_, 8) | |
2438 H264_MC(avg_, 16) | |
2439 | |
2440 #undef op_avg | |
2441 #undef op_put | |
2442 #undef op2_avg | |
2443 #undef op2_put | |
2444 #endif | |
2445 | |
4594 | 2446 #define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom ) |
2447 #define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) | |
2415 | 2448 #define H264_WEIGHT(W,H) \ |
2449 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \ | |
3029 | 2450 int y; \ |
2415 | 2451 offset <<= log2_denom; \ |
2452 if(log2_denom) offset += 1<<(log2_denom-1); \ | |
2453 for(y=0; y<H; y++, block += stride){ \ | |
2454 op_scale1(0); \ | |
2455 op_scale1(1); \ | |
2456 if(W==2) continue; \ | |
2457 op_scale1(2); \ | |
2458 op_scale1(3); \ | |
2459 if(W==4) continue; \ | |
2460 op_scale1(4); \ | |
2461 op_scale1(5); \ | |
2462 op_scale1(6); \ | |
2463 op_scale1(7); \ | |
2464 if(W==8) continue; \ | |
2465 op_scale1(8); \ | |
2466 op_scale1(9); \ | |
2467 op_scale1(10); \ | |
2468 op_scale1(11); \ | |
2469 op_scale1(12); \ | |
2470 op_scale1(13); \ | |
2471 op_scale1(14); \ | |
2472 op_scale1(15); \ | |
2473 } \ | |
2474 } \ | |
3029 | 2475 static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \ |
2476 int y; \ | |
2477 offset = ((offset + 1) | 1) << log2_denom; \ | |
2415 | 2478 for(y=0; y<H; y++, dst += stride, src += stride){ \ |
2479 op_scale2(0); \ | |
2480 op_scale2(1); \ | |
2481 if(W==2) continue; \ | |
2482 op_scale2(2); \ | |
2483 op_scale2(3); \ | |
2484 if(W==4) continue; \ | |
2485 op_scale2(4); \ | |
2486 op_scale2(5); \ | |
2487 op_scale2(6); \ | |
2488 op_scale2(7); \ | |
2489 if(W==8) continue; \ | |
2490 op_scale2(8); \ | |
2491 op_scale2(9); \ | |
2492 op_scale2(10); \ | |
2493 op_scale2(11); \ | |
2494 op_scale2(12); \ | |
2495 op_scale2(13); \ | |
2496 op_scale2(14); \ | |
2497 op_scale2(15); \ | |
2498 } \ | |
2499 } | |
2500 | |
2501 H264_WEIGHT(16,16) | |
2502 H264_WEIGHT(16,8) | |
2503 H264_WEIGHT(8,16) | |
2504 H264_WEIGHT(8,8) | |
2505 H264_WEIGHT(8,4) | |
2506 H264_WEIGHT(4,8) | |
2507 H264_WEIGHT(4,4) | |
2508 H264_WEIGHT(4,2) | |
2509 H264_WEIGHT(2,4) | |
2510 H264_WEIGHT(2,2) | |
2511 | |
2512 #undef op_scale1 | |
2513 #undef op_scale2 | |
2514 #undef H264_WEIGHT | |
2515 | |
936 | 2516 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ |
4176 | 2517 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
936 | 2518 int i; |
2519 | |
2520 for(i=0; i<h; i++){ | |
2521 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4]; | |
2522 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4]; | |
2523 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4]; | |
2524 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4]; | |
2525 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4]; | |
2526 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; | |
2527 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; | |
2528 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; | |
2529 dst+=dstStride; | |
2967 | 2530 src+=srcStride; |
936 | 2531 } |
2532 } | |
2533 | |
3432 | 2534 #ifdef CONFIG_CAVS_DECODER |
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2535 /* AVS specific */ |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2536 void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2537 |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2538 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2539 put_pixels8_c(dst, src, stride, 8); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2540 } |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2541 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2542 avg_pixels8_c(dst, src, stride, 8); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2543 } |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2544 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2545 put_pixels16_c(dst, src, stride, 16); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2546 } |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2547 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2548 avg_pixels16_c(dst, src, stride, 16); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2549 } |
3432 | 2550 #endif /* CONFIG_CAVS_DECODER */ |
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2551 |
3526 | 2552 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER) |
2553 /* VC-1 specific */ | |
2554 void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx); | |
2555 | |
2556 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) { | |
2557 put_pixels8_c(dst, src, stride, 8); | |
2558 } | |
2559 #endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */ | |
2560 | |
4296 | 2561 #if defined(CONFIG_H264_ENCODER) |
2562 /* H264 specific */ | |
5411
362aec4ef932
Take care of some renames (Doxygen and function name) after the previous pure rename patch.
takis
parents:
5394
diff
changeset
|
2563 void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx); |
4296 | 2564 #endif /* CONFIG_H264_ENCODER */ |
2565 | |
936 | 2566 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ |
4176 | 2567 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
936 | 2568 int i; |
2569 | |
2570 for(i=0; i<w; i++){ | |
2571 const int src_1= src[ -srcStride]; | |
2572 const int src0 = src[0 ]; | |
2573 const int src1 = src[ srcStride]; | |
2574 const int src2 = src[2*srcStride]; | |
2575 const int src3 = src[3*srcStride]; | |
2576 const int src4 = src[4*srcStride]; | |
2577 const int src5 = src[5*srcStride]; | |
2578 const int src6 = src[6*srcStride]; | |
2579 const int src7 = src[7*srcStride]; | |
2580 const int src8 = src[8*srcStride]; | |
2581 const int src9 = src[9*srcStride]; | |
2582 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; | |
2583 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; | |
2584 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; | |
2585 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; | |
2586 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; | |
2587 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; | |
2588 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; | |
2589 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; | |
2590 src++; | |
2591 dst++; | |
2592 } | |
2593 } | |
2594 | |
2595 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){ | |
2596 put_pixels8_c(dst, src, stride, 8); | |
2597 } | |
2598 | |
2599 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ | |
2600 uint8_t half[64]; | |
2601 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | |
2602 put_pixels8_l2(dst, src, half, stride, stride, 8, 8); | |
2603 } | |
2604 | |
2605 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ | |
2606 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); | |
2607 } | |
2608 | |
2609 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ | |
2610 uint8_t half[64]; | |
2611 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | |
2612 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8); | |
2613 } | |
2614 | |
2615 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ | |
2616 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); | |
2617 } | |
2618 | |
2619 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ | |
2620 uint8_t halfH[88]; | |
2621 uint8_t halfV[64]; | |
2622 uint8_t halfHV[64]; | |
2623 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2624 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); | |
2625 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | |
2626 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | |
2627 } | |
2628 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ | |
2629 uint8_t halfH[88]; | |
2630 uint8_t halfV[64]; | |
2631 uint8_t halfHV[64]; | |
2632 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2633 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); | |
2634 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | |
2635 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | |
2636 } | |
2637 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ | |
2638 uint8_t halfH[88]; | |
2639 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2640 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); | |
2641 } | |
2642 | |
1644 | 2643 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){ |
5394
e9a6215f4e3a
help some gcc version to optimize out those functions
aurel
parents:
5291
diff
changeset
|
2644 if(ENABLE_ANY_H263) { |
1644 | 2645 int x; |
2646 const int strength= ff_h263_loop_filter_strength[qscale]; | |
2967 | 2647 |
1644 | 2648 for(x=0; x<8; x++){ |
2649 int d1, d2, ad1; | |
2650 int p0= src[x-2*stride]; | |
2651 int p1= src[x-1*stride]; | |
2652 int p2= src[x+0*stride]; | |
2653 int p3= src[x+1*stride]; | |
2654 int d = (p0 - p3 + 4*(p2 - p1)) / 8; | |
2655 | |
2656 if (d<-2*strength) d1= 0; | |
2657 else if(d<- strength) d1=-2*strength - d; | |
2658 else if(d< strength) d1= d; | |
2659 else if(d< 2*strength) d1= 2*strength - d; | |
2660 else d1= 0; | |
2967 | 2661 |
1644 | 2662 p1 += d1; |
2663 p2 -= d1; | |
2664 if(p1&256) p1= ~(p1>>31); | |
2665 if(p2&256) p2= ~(p2>>31); | |
2967 | 2666 |
1644 | 2667 src[x-1*stride] = p1; |
2668 src[x+0*stride] = p2; | |
2669 | |
4001 | 2670 ad1= FFABS(d1)>>1; |
2967 | 2671 |
4594 | 2672 d2= av_clip((p0-p3)/4, -ad1, ad1); |
2967 | 2673 |
1644 | 2674 src[x-2*stride] = p0 - d2; |
2675 src[x+ stride] = p3 + d2; | |
2676 } | |
5394
e9a6215f4e3a
help some gcc version to optimize out those functions
aurel
parents:
5291
diff
changeset
|
2677 } |
1644 | 2678 } |
2679 | |
2680 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ | |
5394
e9a6215f4e3a
help some gcc version to optimize out those functions
aurel
parents:
5291
diff
changeset
|
2681 if(ENABLE_ANY_H263) { |
1644 | 2682 int y; |
2683 const int strength= ff_h263_loop_filter_strength[qscale]; | |
2967 | 2684 |
1644 | 2685 for(y=0; y<8; y++){ |
2686 int d1, d2, ad1; | |
2687 int p0= src[y*stride-2]; | |
2688 int p1= src[y*stride-1]; | |
2689 int p2= src[y*stride+0]; | |
2690 int p3= src[y*stride+1]; | |
2691 int d = (p0 - p3 + 4*(p2 - p1)) / 8; | |
2692 | |
2693 if (d<-2*strength) d1= 0; | |
2694 else if(d<- strength) d1=-2*strength - d; | |
2695 else if(d< strength) d1= d; | |
2696 else if(d< 2*strength) d1= 2*strength - d; | |
2697 else d1= 0; | |
2967 | 2698 |
1644 | 2699 p1 += d1; |
2700 p2 -= d1; | |
2701 if(p1&256) p1= ~(p1>>31); | |
2702 if(p2&256) p2= ~(p2>>31); | |
2967 | 2703 |
1644 | 2704 src[y*stride-1] = p1; |
2705 src[y*stride+0] = p2; | |
2706 | |
4001 | 2707 ad1= FFABS(d1)>>1; |
2967 | 2708 |
4594 | 2709 d2= av_clip((p0-p3)/4, -ad1, ad1); |
2967 | 2710 |
1644 | 2711 src[y*stride-2] = p0 - d2; |
2712 src[y*stride+1] = p3 + d2; | |
2713 } | |
5394
e9a6215f4e3a
help some gcc version to optimize out those functions
aurel
parents:
5291
diff
changeset
|
2714 } |
1644 | 2715 } |
936 | 2716 |
2045 | 2717 static void h261_loop_filter_c(uint8_t *src, int stride){ |
2718 int x,y,xy,yz; | |
2719 int temp[64]; | |
2720 | |
2721 for(x=0; x<8; x++){ | |
2722 temp[x ] = 4*src[x ]; | |
2723 temp[x + 7*8] = 4*src[x + 7*stride]; | |
2724 } | |
2725 for(y=1; y<7; y++){ | |
2726 for(x=0; x<8; x++){ | |
2727 xy = y * stride + x; | |
2728 yz = y * 8 + x; | |
2729 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride]; | |
2044
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2730 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2731 } |
2967 | 2732 |
2045 | 2733 for(y=0; y<8; y++){ |
2734 src[ y*stride] = (temp[ y*8] + 2)>>2; | |
2735 src[7+y*stride] = (temp[7+y*8] + 2)>>2; | |
2736 for(x=1; x<7; x++){ | |
2737 xy = y * stride + x; | |
2738 yz = y * 8 + x; | |
2739 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4; | |
2044
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2740 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2741 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2742 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2743 |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2744 static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) |
2633 | 2745 { |
2746 int i, d; | |
2747 for( i = 0; i < 4; i++ ) { | |
2748 if( tc0[i] < 0 ) { | |
2749 pix += 4*ystride; | |
2750 continue; | |
2751 } | |
2752 for( d = 0; d < 4; d++ ) { | |
2753 const int p0 = pix[-1*xstride]; | |
2754 const int p1 = pix[-2*xstride]; | |
2755 const int p2 = pix[-3*xstride]; | |
2756 const int q0 = pix[0]; | |
2757 const int q1 = pix[1*xstride]; | |
2758 const int q2 = pix[2*xstride]; | |
2967 | 2759 |
4001 | 2760 if( FFABS( p0 - q0 ) < alpha && |
2761 FFABS( p1 - p0 ) < beta && | |
2762 FFABS( q1 - q0 ) < beta ) { | |
2967 | 2763 |
2633 | 2764 int tc = tc0[i]; |
2765 int i_delta; | |
2967 | 2766 |
4001 | 2767 if( FFABS( p2 - p0 ) < beta ) { |
4594 | 2768 pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] ); |
2633 | 2769 tc++; |
2770 } | |
4001 | 2771 if( FFABS( q2 - q0 ) < beta ) { |
4594 | 2772 pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] ); |
2633 | 2773 tc++; |
2774 } | |
2967 | 2775 |
4594 | 2776 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); |
2777 pix[-xstride] = av_clip_uint8( p0 + i_delta ); /* p0' */ | |
2778 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ | |
2633 | 2779 } |
2780 pix += ystride; | |
2781 } | |
2782 } | |
2783 } | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2784 static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
2633 | 2785 { |
2786 h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0); | |
2787 } | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2788 static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
2633 | 2789 { |
2790 h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0); | |
2791 } | |
2792 | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2793 static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) |
2633 | 2794 { |
2795 int i, d; | |
2796 for( i = 0; i < 4; i++ ) { | |
2797 const int tc = tc0[i]; | |
2798 if( tc <= 0 ) { | |
2799 pix += 2*ystride; | |
2800 continue; | |
2801 } | |
2802 for( d = 0; d < 2; d++ ) { | |
2803 const int p0 = pix[-1*xstride]; | |
2804 const int p1 = pix[-2*xstride]; | |
2805 const int q0 = pix[0]; | |
2806 const int q1 = pix[1*xstride]; | |
2807 | |
4001 | 2808 if( FFABS( p0 - q0 ) < alpha && |
2809 FFABS( p1 - p0 ) < beta && | |
2810 FFABS( q1 - q0 ) < beta ) { | |
2633 | 2811 |
4594 | 2812 int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); |
2813 | |
2814 pix[-xstride] = av_clip_uint8( p0 + delta ); /* p0' */ | |
2815 pix[0] = av_clip_uint8( q0 - delta ); /* q0' */ | |
2633 | 2816 } |
2817 pix += ystride; | |
2818 } | |
2819 } | |
2820 } | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2821 static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
2633 | 2822 { |
2823 h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0); | |
2824 } | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2825 static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
2633 | 2826 { |
2827 h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0); | |
2828 } | |
2829 | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2830 static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2831 { |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2832 int d; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2833 for( d = 0; d < 8; d++ ) { |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2834 const int p0 = pix[-1*xstride]; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2835 const int p1 = pix[-2*xstride]; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2836 const int q0 = pix[0]; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2837 const int q1 = pix[1*xstride]; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2838 |
4001 | 2839 if( FFABS( p0 - q0 ) < alpha && |
2840 FFABS( p1 - p0 ) < beta && | |
2841 FFABS( q1 - q0 ) < beta ) { | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2842 |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2843 pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2844 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2845 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2846 pix += ystride; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2847 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2848 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2849 static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2850 { |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2851 h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta); |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2852 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2853 static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2854 { |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2855 h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta); |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2856 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2857 |
1708 | 2858 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2859 { |
2860 int s, i; | |
2861 | |
2862 s = 0; | |
1708 | 2863 for(i=0;i<h;i++) { |
0 | 2864 s += abs(pix1[0] - pix2[0]); |
2865 s += abs(pix1[1] - pix2[1]); | |
2866 s += abs(pix1[2] - pix2[2]); | |
2867 s += abs(pix1[3] - pix2[3]); | |
2868 s += abs(pix1[4] - pix2[4]); | |
2869 s += abs(pix1[5] - pix2[5]); | |
2870 s += abs(pix1[6] - pix2[6]); | |
2871 s += abs(pix1[7] - pix2[7]); | |
2872 s += abs(pix1[8] - pix2[8]); | |
2873 s += abs(pix1[9] - pix2[9]); | |
2874 s += abs(pix1[10] - pix2[10]); | |
2875 s += abs(pix1[11] - pix2[11]); | |
2876 s += abs(pix1[12] - pix2[12]); | |
2877 s += abs(pix1[13] - pix2[13]); | |
2878 s += abs(pix1[14] - pix2[14]); | |
2879 s += abs(pix1[15] - pix2[15]); | |
2880 pix1 += line_size; | |
2881 pix2 += line_size; | |
2882 } | |
2883 return s; | |
2884 } | |
2885 | |
1708 | 2886 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2887 { |
2888 int s, i; | |
2889 | |
2890 s = 0; | |
1708 | 2891 for(i=0;i<h;i++) { |
0 | 2892 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
2893 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
2894 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
2895 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
2896 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
2897 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
2898 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
2899 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
2900 s += abs(pix1[8] - avg2(pix2[8], pix2[9])); | |
2901 s += abs(pix1[9] - avg2(pix2[9], pix2[10])); | |
2902 s += abs(pix1[10] - avg2(pix2[10], pix2[11])); | |
2903 s += abs(pix1[11] - avg2(pix2[11], pix2[12])); | |
2904 s += abs(pix1[12] - avg2(pix2[12], pix2[13])); | |
2905 s += abs(pix1[13] - avg2(pix2[13], pix2[14])); | |
2906 s += abs(pix1[14] - avg2(pix2[14], pix2[15])); | |
2907 s += abs(pix1[15] - avg2(pix2[15], pix2[16])); | |
2908 pix1 += line_size; | |
2909 pix2 += line_size; | |
2910 } | |
2911 return s; | |
2912 } | |
2913 | |
1708 | 2914 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2915 { |
2916 int s, i; | |
1064 | 2917 uint8_t *pix3 = pix2 + line_size; |
0 | 2918 |
2919 s = 0; | |
1708 | 2920 for(i=0;i<h;i++) { |
0 | 2921 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
2922 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
2923 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
2924 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
2925 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
2926 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
2927 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
2928 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
2929 s += abs(pix1[8] - avg2(pix2[8], pix3[8])); | |
2930 s += abs(pix1[9] - avg2(pix2[9], pix3[9])); | |
2931 s += abs(pix1[10] - avg2(pix2[10], pix3[10])); | |
2932 s += abs(pix1[11] - avg2(pix2[11], pix3[11])); | |
2933 s += abs(pix1[12] - avg2(pix2[12], pix3[12])); | |
2934 s += abs(pix1[13] - avg2(pix2[13], pix3[13])); | |
2935 s += abs(pix1[14] - avg2(pix2[14], pix3[14])); | |
2936 s += abs(pix1[15] - avg2(pix2[15], pix3[15])); | |
2937 pix1 += line_size; | |
2938 pix2 += line_size; | |
2939 pix3 += line_size; | |
2940 } | |
2941 return s; | |
2942 } | |
2943 | |
1708 | 2944 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2945 { |
2946 int s, i; | |
1064 | 2947 uint8_t *pix3 = pix2 + line_size; |
0 | 2948 |
2949 s = 0; | |
1708 | 2950 for(i=0;i<h;i++) { |
0 | 2951 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
2952 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
2953 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
2954 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
2955 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
2956 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
2957 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
2958 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
2959 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); | |
2960 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); | |
2961 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); | |
2962 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); | |
2963 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); | |
2964 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); | |
2965 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); | |
2966 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); | |
2967 pix1 += line_size; | |
2968 pix2 += line_size; | |
2969 pix3 += line_size; | |
2970 } | |
2971 return s; | |
2972 } | |
2973 | |
1708 | 2974 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2975 { |
2976 int s, i; | |
2977 | |
2978 s = 0; | |
1708 | 2979 for(i=0;i<h;i++) { |
294 | 2980 s += abs(pix1[0] - pix2[0]); |
2981 s += abs(pix1[1] - pix2[1]); | |
2982 s += abs(pix1[2] - pix2[2]); | |
2983 s += abs(pix1[3] - pix2[3]); | |
2984 s += abs(pix1[4] - pix2[4]); | |
2985 s += abs(pix1[5] - pix2[5]); | |
2986 s += abs(pix1[6] - pix2[6]); | |
2987 s += abs(pix1[7] - pix2[7]); | |
2988 pix1 += line_size; | |
2989 pix2 += line_size; | |
2990 } | |
2991 return s; | |
2992 } | |
2993 | |
1708 | 2994 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2995 { |
2996 int s, i; | |
2997 | |
2998 s = 0; | |
1708 | 2999 for(i=0;i<h;i++) { |
294 | 3000 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
3001 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
3002 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
3003 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
3004 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
3005 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
3006 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
3007 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
3008 pix1 += line_size; | |
3009 pix2 += line_size; | |
3010 } | |
3011 return s; | |
3012 } | |
3013 | |
1708 | 3014 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 3015 { |
3016 int s, i; | |
1064 | 3017 uint8_t *pix3 = pix2 + line_size; |
294 | 3018 |
3019 s = 0; | |
1708 | 3020 for(i=0;i<h;i++) { |
294 | 3021 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
3022 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
3023 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
3024 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
3025 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
3026 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
3027 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
3028 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
3029 pix1 += line_size; | |
3030 pix2 += line_size; | |
3031 pix3 += line_size; | |
3032 } | |
3033 return s; | |
3034 } | |
3035 | |
1708 | 3036 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 3037 { |
3038 int s, i; | |
1064 | 3039 uint8_t *pix3 = pix2 + line_size; |
294 | 3040 |
3041 s = 0; | |
1708 | 3042 for(i=0;i<h;i++) { |
294 | 3043 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
3044 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
3045 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
3046 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
3047 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
3048 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
3049 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
3050 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
3051 pix1 += line_size; | |
3052 pix2 += line_size; | |
3053 pix3 += line_size; | |
3054 } | |
3055 return s; | |
3056 } | |
3057 | |
2834 | 3058 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ |
3059 MpegEncContext *c = v; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3060 int score1=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3061 int score2=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3062 int x,y; |
2066 | 3063 |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3064 for(y=0; y<h; y++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3065 for(x=0; x<16; x++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3066 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3067 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3068 if(y+1<h){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3069 for(x=0; x<15; x++){ |
4001 | 3070 score2+= FFABS( s1[x ] - s1[x +stride] |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3071 - s1[x+1] + s1[x+1+stride]) |
4001 | 3072 -FFABS( s2[x ] - s2[x +stride] |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3073 - s2[x+1] + s2[x+1+stride]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3074 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3075 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3076 s1+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3077 s2+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3078 } |
2066 | 3079 |
4001 | 3080 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; |
3081 else return score1 + FFABS(score2)*8; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3082 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3083 |
2834 | 3084 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ |
3085 MpegEncContext *c = v; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3086 int score1=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3087 int score2=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3088 int x,y; |
2967 | 3089 |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3090 for(y=0; y<h; y++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3091 for(x=0; x<8; x++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3092 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3093 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3094 if(y+1<h){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3095 for(x=0; x<7; x++){ |
4001 | 3096 score2+= FFABS( s1[x ] - s1[x +stride] |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3097 - s1[x+1] + s1[x+1+stride]) |
4001 | 3098 -FFABS( s2[x ] - s2[x +stride] |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3099 - s2[x+1] + s2[x+1+stride]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3100 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3101 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3102 s1+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3103 s2+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3104 } |
2967 | 3105 |
4001 | 3106 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; |
3107 else return score1 + FFABS(score2)*8; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3108 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3109 |
1784 | 3110 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ |
3111 int i; | |
3112 unsigned int sum=0; | |
3113 | |
3114 for(i=0; i<8*8; i++){ | |
3115 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT)); | |
3116 int w= weight[i]; | |
3117 b>>= RECON_SHIFT; | |
3118 assert(-512<b && b<512); | |
3119 | |
3120 sum += (w*b)*(w*b)>>4; | |
3121 } | |
3122 return sum>>2; | |
3123 } | |
3124 | |
3125 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){ | |
3126 int i; | |
3127 | |
3128 for(i=0; i<8*8; i++){ | |
3129 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT); | |
2967 | 3130 } |
1784 | 3131 } |
3132 | |
1100 | 3133 /** |
3134 * permutes an 8x8 block. | |
1101 | 3135 * @param block the block which will be permuted according to the given permutation vector |
1100 | 3136 * @param permutation the permutation vector |
3137 * @param last the last non zero coefficient in scantable order, used to speed the permutation up | |
2967 | 3138 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not |
1101 | 3139 * (inverse) permutated to scantable order! |
1100 | 3140 */ |
1064 | 3141 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last) |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
3142 { |
764 | 3143 int i; |
945 | 3144 DCTELEM temp[64]; |
2967 | 3145 |
764 | 3146 if(last<=0) return; |
5129 | 3147 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
3148 |
764 | 3149 for(i=0; i<=last; i++){ |
3150 const int j= scantable[i]; | |
3151 temp[j]= block[j]; | |
3152 block[j]=0; | |
3153 } | |
2967 | 3154 |
764 | 3155 for(i=0; i<=last; i++){ |
3156 const int j= scantable[i]; | |
3157 const int perm_j= permutation[j]; | |
3158 block[perm_j]= temp[j]; | |
3159 } | |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
3160 } |
34 | 3161 |
1729 | 3162 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ |
3163 return 0; | |
3164 } | |
3165 | |
3166 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ | |
3167 int i; | |
2967 | 3168 |
1729 | 3169 memset(cmp, 0, sizeof(void*)*5); |
2967 | 3170 |
1729 | 3171 for(i=0; i<5; i++){ |
3172 switch(type&0xFF){ | |
3173 case FF_CMP_SAD: | |
3174 cmp[i]= c->sad[i]; | |
3175 break; | |
3176 case FF_CMP_SATD: | |
3177 cmp[i]= c->hadamard8_diff[i]; | |
3178 break; | |
3179 case FF_CMP_SSE: | |
3180 cmp[i]= c->sse[i]; | |
3181 break; | |
3182 case FF_CMP_DCT: | |
3183 cmp[i]= c->dct_sad[i]; | |
3184 break; | |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3185 case FF_CMP_DCT264: |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3186 cmp[i]= c->dct264_sad[i]; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3187 break; |
2382 | 3188 case FF_CMP_DCTMAX: |
3189 cmp[i]= c->dct_max[i]; | |
3190 break; | |
1729 | 3191 case FF_CMP_PSNR: |
3192 cmp[i]= c->quant_psnr[i]; | |
3193 break; | |
3194 case FF_CMP_BIT: | |
3195 cmp[i]= c->bit[i]; | |
3196 break; | |
3197 case FF_CMP_RD: | |
3198 cmp[i]= c->rd[i]; | |
3199 break; | |
3200 case FF_CMP_VSAD: | |
3201 cmp[i]= c->vsad[i]; | |
3202 break; | |
3203 case FF_CMP_VSSE: | |
3204 cmp[i]= c->vsse[i]; | |
3205 break; | |
3206 case FF_CMP_ZERO: | |
3207 cmp[i]= zero_cmp; | |
3208 break; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3209 case FF_CMP_NSSE: |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3210 cmp[i]= c->nsse[i]; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3211 break; |
3373
b8996cc5ccae
Disable w53 and w97 cmp methods when snow encoder is disabled
gpoirier
parents:
3323
diff
changeset
|
3212 #ifdef CONFIG_SNOW_ENCODER |
2184 | 3213 case FF_CMP_W53: |
3214 cmp[i]= c->w53[i]; | |
3215 break; | |
3216 case FF_CMP_W97: | |
3217 cmp[i]= c->w97[i]; | |
3218 break; | |
3373
b8996cc5ccae
Disable w53 and w97 cmp methods when snow encoder is disabled
gpoirier
parents:
3323
diff
changeset
|
3219 #endif |
1729 | 3220 default: |
3221 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); | |
3222 } | |
3223 } | |
3224 } | |
3225 | |
1101 | 3226 /** |
3227 * memset(blocks, 0, sizeof(DCTELEM)*6*64) | |
3228 */ | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3229 static void clear_blocks_c(DCTELEM *blocks) |
296 | 3230 { |
3231 memset(blocks, 0, sizeof(DCTELEM)*6*64); | |
3232 } | |
3233 | |
866 | 3234 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ |
3235 int i; | |
996
ad44196ea483
add/diff_bytes bugfix patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>)
michaelni
parents:
984
diff
changeset
|
3236 for(i=0; i+7<w; i+=8){ |
866 | 3237 dst[i+0] += src[i+0]; |
3238 dst[i+1] += src[i+1]; | |
3239 dst[i+2] += src[i+2]; | |
3240 dst[i+3] += src[i+3]; | |
3241 dst[i+4] += src[i+4]; | |
3242 dst[i+5] += src[i+5]; | |
3243 dst[i+6] += src[i+6]; | |
3244 dst[i+7] += src[i+7]; | |
3245 } | |
3246 for(; i<w; i++) | |
3247 dst[i+0] += src[i+0]; | |
3248 } | |
3249 | |
3250 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ | |
3251 int i; | |
996
ad44196ea483
add/diff_bytes bugfix patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>)
michaelni
parents:
984
diff
changeset
|
3252 for(i=0; i+7<w; i+=8){ |
866 | 3253 dst[i+0] = src1[i+0]-src2[i+0]; |
3254 dst[i+1] = src1[i+1]-src2[i+1]; | |
3255 dst[i+2] = src1[i+2]-src2[i+2]; | |
3256 dst[i+3] = src1[i+3]-src2[i+3]; | |
3257 dst[i+4] = src1[i+4]-src2[i+4]; | |
3258 dst[i+5] = src1[i+5]-src2[i+5]; | |
3259 dst[i+6] = src1[i+6]-src2[i+6]; | |
3260 dst[i+7] = src1[i+7]-src2[i+7]; | |
3261 } | |
3262 for(; i<w; i++) | |
3263 dst[i+0] = src1[i+0]-src2[i+0]; | |
3264 } | |
3265 | |
1527 | 3266 static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ |
3267 int i; | |
3268 uint8_t l, lt; | |
3269 | |
3270 l= *left; | |
3271 lt= *left_top; | |
3272 | |
3273 for(i=0; i<w; i++){ | |
3274 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); | |
3275 lt= src1[i]; | |
3276 l= src2[i]; | |
3277 dst[i]= l - pred; | |
2967 | 3278 } |
1527 | 3279 |
3280 *left= l; | |
3281 *left_top= lt; | |
3282 } | |
3283 | |
936 | 3284 #define BUTTERFLY2(o1,o2,i1,i2) \ |
3285 o1= (i1)+(i2);\ | |
3286 o2= (i1)-(i2); | |
3287 | |
3288 #define BUTTERFLY1(x,y) \ | |
3289 {\ | |
3290 int a,b;\ | |
3291 a= x;\ | |
3292 b= y;\ | |
3293 x= a+b;\ | |
3294 y= a-b;\ | |
3295 } | |
3296 | |
4001 | 3297 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y))) |
936 | 3298 |
1708 | 3299 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ |
936 | 3300 int i; |
3301 int temp[64]; | |
3302 int sum=0; | |
2967 | 3303 |
1708 | 3304 assert(h==8); |
936 | 3305 |
3306 for(i=0; i<8; i++){ | |
3307 //FIXME try pointer walks | |
3308 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]); | |
3309 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]); | |
3310 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]); | |
3311 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]); | |
2967 | 3312 |
936 | 3313 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); |
3314 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | |
3315 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | |
3316 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | |
2967 | 3317 |
936 | 3318 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); |
3319 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | |
3320 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | |
3321 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | |
3322 } | |
3323 | |
3324 for(i=0; i<8; i++){ | |
3325 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | |
3326 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | |
3327 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | |
3328 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | |
2967 | 3329 |
936 | 3330 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); |
3331 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | |
3332 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | |
3333 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | |
3334 | |
2967 | 3335 sum += |
936 | 3336 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) |
3337 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | |
3338 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | |
3339 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | |
3340 } | |
3341 #if 0 | |
3342 static int maxi=0; | |
3343 if(sum>maxi){ | |
3344 maxi=sum; | |
3345 printf("MAX:%d\n", maxi); | |
3346 } | |
3347 #endif | |
3348 return sum; | |
3349 } | |
3350 | |
1729 | 3351 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){ |
936 | 3352 int i; |
3353 int temp[64]; | |
3354 int sum=0; | |
2967 | 3355 |
1729 | 3356 assert(h==8); |
2967 | 3357 |
936 | 3358 for(i=0; i<8; i++){ |
3359 //FIXME try pointer walks | |
1729 | 3360 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]); |
3361 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]); | |
3362 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]); | |
3363 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]); | |
2967 | 3364 |
936 | 3365 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); |
3366 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | |
3367 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | |
3368 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | |
2967 | 3369 |
936 | 3370 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); |
3371 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | |
3372 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | |
3373 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | |
3374 } | |
3375 | |
3376 for(i=0; i<8; i++){ | |
3377 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | |
3378 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | |
3379 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | |
3380 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | |
2967 | 3381 |
936 | 3382 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); |
3383 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | |
3384 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | |
3385 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | |
2967 | 3386 |
3387 sum += | |
936 | 3388 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) |
3389 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | |
3390 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | |
3391 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | |
3392 } | |
2967 | 3393 |
4001 | 3394 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean |
2967 | 3395 |
936 | 3396 return sum; |
3397 } | |
3398 | |
1708 | 3399 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
936 | 3400 MpegEncContext * const s= (MpegEncContext *)c; |
4988
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
3401 DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); |
1016 | 3402 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
2967 | 3403 |
1708 | 3404 assert(h==8); |
936 | 3405 |
3406 s->dsp.diff_pixels(temp, src1, src2, stride); | |
1092 | 3407 s->dsp.fdct(temp); |
4988
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
3408 return s->dsp.sum_abs_dctelem(temp); |
936 | 3409 } |
3410 | |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3411 #ifdef CONFIG_GPL |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3412 #define DCT8_1D {\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3413 const int s07 = SRC(0) + SRC(7);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3414 const int s16 = SRC(1) + SRC(6);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3415 const int s25 = SRC(2) + SRC(5);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3416 const int s34 = SRC(3) + SRC(4);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3417 const int a0 = s07 + s34;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3418 const int a1 = s16 + s25;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3419 const int a2 = s07 - s34;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3420 const int a3 = s16 - s25;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3421 const int d07 = SRC(0) - SRC(7);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3422 const int d16 = SRC(1) - SRC(6);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3423 const int d25 = SRC(2) - SRC(5);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3424 const int d34 = SRC(3) - SRC(4);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3425 const int a4 = d16 + d25 + (d07 + (d07>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3426 const int a5 = d07 - d34 - (d25 + (d25>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3427 const int a6 = d07 + d34 - (d16 + (d16>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3428 const int a7 = d16 - d25 + (d34 + (d34>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3429 DST(0, a0 + a1 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3430 DST(1, a4 + (a7>>2)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3431 DST(2, a2 + (a3>>1)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3432 DST(3, a5 + (a6>>2)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3433 DST(4, a0 - a1 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3434 DST(5, a6 - (a5>>2)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3435 DST(6, (a2>>1) - a3 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3436 DST(7, (a4>>2) - a7 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3437 } |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3438 |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3439 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3440 MpegEncContext * const s= (MpegEncContext *)c; |
5256 | 3441 DCTELEM dct[8][8]; |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3442 int i; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3443 int sum=0; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3444 |
5256 | 3445 s->dsp.diff_pixels(dct[0], src1, src2, stride); |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3446 |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3447 #define SRC(x) dct[i][x] |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3448 #define DST(x,v) dct[i][x]= v |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3449 for( i = 0; i < 8; i++ ) |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3450 DCT8_1D |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3451 #undef SRC |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3452 #undef DST |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3453 |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3454 #define SRC(x) dct[x][i] |
4001 | 3455 #define DST(x,v) sum += FFABS(v) |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3456 for( i = 0; i < 8; i++ ) |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3457 DCT8_1D |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3458 #undef SRC |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3459 #undef DST |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3460 return sum; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3461 } |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3462 #endif |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3463 |
2382 | 3464 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
3465 MpegEncContext * const s= (MpegEncContext *)c; | |
3089 | 3466 DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); |
2382 | 3467 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
3468 int sum=0, i; | |
2967 | 3469 |
2382 | 3470 assert(h==8); |
3471 | |
3472 s->dsp.diff_pixels(temp, src1, src2, stride); | |
3473 s->dsp.fdct(temp); | |
3474 | |
3475 for(i=0; i<64; i++) | |
4001 | 3476 sum= FFMAX(sum, FFABS(temp[i])); |
2967 | 3477 |
2382 | 3478 return sum; |
3479 } | |
3480 | |
1708 | 3481 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
936 | 3482 MpegEncContext * const s= (MpegEncContext *)c; |
3089 | 3483 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]); |
1016 | 3484 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
3485 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; | |
936 | 3486 int sum=0, i; |
3487 | |
1708 | 3488 assert(h==8); |
936 | 3489 s->mb_intra=0; |
2967 | 3490 |
936 | 3491 s->dsp.diff_pixels(temp, src1, src2, stride); |
2967 | 3492 |
936 | 3493 memcpy(bak, temp, 64*sizeof(DCTELEM)); |
2967 | 3494 |
1013 | 3495 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
1689 | 3496 s->dct_unquantize_inter(s, temp, 0, s->qscale); |
2967 | 3497 simple_idct(temp); //FIXME |
3498 | |
936 | 3499 for(i=0; i<64; i++) |
3500 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); | |
2967 | 3501 |
936 | 3502 return sum; |
3503 } | |
3504 | |
1708 | 3505 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
1007 | 3506 MpegEncContext * const s= (MpegEncContext *)c; |
1064 | 3507 const uint8_t *scantable= s->intra_scantable.permutated; |
3089 | 3508 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); |
3509 DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]); | |
1016 | 3510 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
3511 uint8_t * const bak= (uint8_t*)aligned_bak; | |
1007 | 3512 int i, last, run, bits, level, distoration, start_i; |
3513 const int esc_length= s->ac_esc_length; | |
3514 uint8_t * length; | |
3515 uint8_t * last_length; | |
2967 | 3516 |
1708 | 3517 assert(h==8); |
3518 | |
1007 | 3519 for(i=0; i<8; i++){ |
3520 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0]; | |
3521 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1]; | |
3522 } | |
3523 | |
3524 s->dsp.diff_pixels(temp, src1, src2, stride); | |
3525 | |
1013 | 3526 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
3527 | |
3528 bits=0; | |
2967 | 3529 |
1013 | 3530 if (s->mb_intra) { |
2967 | 3531 start_i = 1; |
1013 | 3532 length = s->intra_ac_vlc_length; |
3533 last_length= s->intra_ac_vlc_last_length; | |
3534 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma | |
3535 } else { | |
3536 start_i = 0; | |
3537 length = s->inter_ac_vlc_length; | |
3538 last_length= s->inter_ac_vlc_last_length; | |
3539 } | |
2967 | 3540 |
1013 | 3541 if(last>=start_i){ |
1007 | 3542 run=0; |
3543 for(i=start_i; i<last; i++){ | |
3544 int j= scantable[i]; | |
3545 level= temp[j]; | |
2967 | 3546 |
1007 | 3547 if(level){ |
3548 level+=64; | |
3549 if((level&(~127)) == 0){ | |
3550 bits+= length[UNI_AC_ENC_INDEX(run, level)]; | |
3551 }else | |
3552 bits+= esc_length; | |
3553 run=0; | |
3554 }else | |
3555 run++; | |
3556 } | |
3557 i= scantable[last]; | |
2967 | 3558 |
1011 | 3559 level= temp[i] + 64; |
3560 | |
3561 assert(level - 64); | |
2967 | 3562 |
1007 | 3563 if((level&(~127)) == 0){ |
3564 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | |
3565 }else | |
3566 bits+= esc_length; | |
2967 | 3567 |
1013 | 3568 } |
3569 | |
3570 if(last>=0){ | |
1689 | 3571 if(s->mb_intra) |
3572 s->dct_unquantize_intra(s, temp, 0, s->qscale); | |
3573 else | |
3574 s->dct_unquantize_inter(s, temp, 0, s->qscale); | |
1007 | 3575 } |
2967 | 3576 |
1092 | 3577 s->dsp.idct_add(bak, stride, temp); |
2967 | 3578 |
1708 | 3579 distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8); |
1007 | 3580 |
1013 | 3581 return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7); |
1007 | 3582 } |
3583 | |
1708 | 3584 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
1007 | 3585 MpegEncContext * const s= (MpegEncContext *)c; |
1064 | 3586 const uint8_t *scantable= s->intra_scantable.permutated; |
3089 | 3587 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); |
1016 | 3588 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
1007 | 3589 int i, last, run, bits, level, start_i; |
3590 const int esc_length= s->ac_esc_length; | |
3591 uint8_t * length; | |
3592 uint8_t * last_length; | |
1708 | 3593 |
3594 assert(h==8); | |
2967 | 3595 |
1013 | 3596 s->dsp.diff_pixels(temp, src1, src2, stride); |
1007 | 3597 |
1013 | 3598 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
3599 | |
3600 bits=0; | |
2967 | 3601 |
1007 | 3602 if (s->mb_intra) { |
2967 | 3603 start_i = 1; |
1007 | 3604 length = s->intra_ac_vlc_length; |
3605 last_length= s->intra_ac_vlc_last_length; | |
1013 | 3606 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma |
1007 | 3607 } else { |
3608 start_i = 0; | |
3609 length = s->inter_ac_vlc_length; | |
3610 last_length= s->inter_ac_vlc_last_length; | |
3611 } | |
2967 | 3612 |
1013 | 3613 if(last>=start_i){ |
1007 | 3614 run=0; |
3615 for(i=start_i; i<last; i++){ | |
3616 int j= scantable[i]; | |
3617 level= temp[j]; | |
2967 | 3618 |
1007 | 3619 if(level){ |
3620 level+=64; | |
3621 if((level&(~127)) == 0){ | |
3622 bits+= length[UNI_AC_ENC_INDEX(run, level)]; | |
3623 }else | |
3624 bits+= esc_length; | |
3625 run=0; | |
3626 }else | |
3627 run++; | |
3628 } | |
3629 i= scantable[last]; | |
2967 | 3630 |
1013 | 3631 level= temp[i] + 64; |
2967 | 3632 |
1013 | 3633 assert(level - 64); |
2967 | 3634 |
1007 | 3635 if((level&(~127)) == 0){ |
3636 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | |
3637 }else | |
3638 bits+= esc_length; | |
3639 } | |
3640 | |
3641 return bits; | |
3642 } | |
3643 | |
1729 | 3644 static int vsad_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ |
3645 int score=0; | |
3646 int x,y; | |
2967 | 3647 |
1729 | 3648 for(y=1; y<h; y++){ |
3649 for(x=0; x<16; x+=4){ | |
4001 | 3650 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) |
3651 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); | |
1729 | 3652 } |
3653 s+= stride; | |
3654 } | |
2967 | 3655 |
1729 | 3656 return score; |
3657 } | |
3658 | |
3659 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | |
3660 int score=0; | |
3661 int x,y; | |
2967 | 3662 |
1729 | 3663 for(y=1; y<h; y++){ |
3664 for(x=0; x<16; x++){ | |
4001 | 3665 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); |
1729 | 3666 } |
3667 s1+= stride; | |
3668 s2+= stride; | |
3669 } | |
2967 | 3670 |
1729 | 3671 return score; |
3672 } | |
3673 | |
3674 #define SQ(a) ((a)*(a)) | |
3675 static int vsse_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ | |
3676 int score=0; | |
3677 int x,y; | |
2967 | 3678 |
1729 | 3679 for(y=1; y<h; y++){ |
3680 for(x=0; x<16; x+=4){ | |
2967 | 3681 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) |
1729 | 3682 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); |
3683 } | |
3684 s+= stride; | |
3685 } | |
2967 | 3686 |
1729 | 3687 return score; |
3688 } | |
3689 | |
3690 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | |
3691 int score=0; | |
3692 int x,y; | |
2967 | 3693 |
1729 | 3694 for(y=1; y<h; y++){ |
3695 for(x=0; x<16; x++){ | |
3696 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); | |
3697 } | |
3698 s1+= stride; | |
3699 s2+= stride; | |
3700 } | |
2967 | 3701 |
1729 | 3702 return score; |
3703 } | |
3704 | |
5255 | 3705 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, |
3706 int size){ | |
4749 | 3707 int score=0; |
3708 int i; | |
3709 for(i=0; i<size; i++) | |
3710 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]); | |
3711 return score; | |
3712 } | |
3713 | |
1708 | 3714 WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) |
1729 | 3715 WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) |
1708 | 3716 WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) |
3013 | 3717 #ifdef CONFIG_GPL |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3718 WARPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c) |
3013 | 3719 #endif |
2382 | 3720 WARPER8_16_SQ(dct_max8x8_c, dct_max16_c) |
1708 | 3721 WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) |
3722 WARPER8_16_SQ(rd8x8_c, rd16_c) | |
3723 WARPER8_16_SQ(bit8x8_c, bit16_c) | |
936 | 3724 |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3725 static void vector_fmul_c(float *dst, const float *src, int len){ |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3726 int i; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3727 for(i=0; i<len; i++) |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3728 dst[i] *= src[i]; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3729 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3730 |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3731 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){ |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3732 int i; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3733 src1 += len-1; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3734 for(i=0; i<len; i++) |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3735 dst[i] = src0[i] * src1[-i]; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3736 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3737 |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3738 void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step){ |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3739 int i; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3740 for(i=0; i<len; i++) |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3741 dst[i*step] = src0[i] * src1[i] + src2[i] + src3; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3742 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3743 |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3744 void ff_float_to_int16_c(int16_t *dst, const float *src, int len){ |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3745 int i; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3746 for(i=0; i<len; i++) { |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3747 int_fast32_t tmp = ((int32_t*)src)[i]; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3748 if(tmp & 0xf0000){ |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3749 tmp = (0x43c0ffff - tmp)>>31; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3750 // is this faster on some gcc/cpu combinations? |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3751 // if(tmp > 0x43c0ffff) tmp = 0xFFFF; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3752 // else tmp = 0; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3753 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3754 dst[i] = tmp - 0x8000; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3755 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3756 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
3757 |
1092 | 3758 /* XXX: those functions should be suppressed ASAP when all IDCTs are |
3759 converted */ | |
3760 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) | |
3761 { | |
3762 j_rev_dct (block); | |
3763 put_pixels_clamped_c(block, dest, line_size); | |
3764 } | |
3765 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) | |
3766 { | |
3767 j_rev_dct (block); | |
3768 add_pixels_clamped_c(block, dest, line_size); | |
3769 } | |
3770 | |
2256 | 3771 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block) |
3772 { | |
3773 j_rev_dct4 (block); | |
3774 put_pixels_clamped4_c(block, dest, line_size); | |
3775 } | |
3776 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block) | |
3777 { | |
3778 j_rev_dct4 (block); | |
3779 add_pixels_clamped4_c(block, dest, line_size); | |
3780 } | |
3781 | |
2257 | 3782 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block) |
3783 { | |
3784 j_rev_dct2 (block); | |
3785 put_pixels_clamped2_c(block, dest, line_size); | |
3786 } | |
3787 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) | |
3788 { | |
3789 j_rev_dct2 (block); | |
3790 add_pixels_clamped2_c(block, dest, line_size); | |
3791 } | |
3792 | |
2259 | 3793 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) |
3794 { | |
4176 | 3795 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2259 | 3796 |
3797 dest[0] = cm[(block[0] + 4)>>3]; | |
3798 } | |
3799 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) | |
3800 { | |
4176 | 3801 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2259 | 3802 |
3803 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; | |
3804 } | |
3805 | |
5143 | 3806 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; } |
3215
06f98047ff26
prefetch pixels for future motion compensation. 2-5% faster h264.
lorenm
parents:
3199
diff
changeset
|
3807 |
1201 | 3808 /* init static data */ |
4197 | 3809 void dsputil_static_init(void) |
0 | 3810 { |
751 | 3811 int i; |
0 | 3812 |
4176 | 3813 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i; |
1201 | 3814 for(i=0;i<MAX_NEG_CROP;i++) { |
4176 | 3815 ff_cropTbl[i] = 0; |
3816 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255; | |
1201 | 3817 } |
2967 | 3818 |
1201 | 3819 for(i=0;i<512;i++) { |
4179 | 3820 ff_squareTbl[i] = (i - 256) * (i - 256); |
1201 | 3821 } |
2967 | 3822 |
4197 | 3823 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; |
1201 | 3824 } |
0 | 3825 |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
3826 int ff_check_alignment(void){ |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
3827 static int did_fail=0; |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
3828 DECLARE_ALIGNED_16(int, aligned); |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
3829 |
5150 | 3830 if((long)&aligned & 15){ |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
3831 if(!did_fail){ |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
3832 #if defined(HAVE_MMX) || defined(HAVE_ALTIVEC) |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
3833 av_log(NULL, AV_LOG_ERROR, |
4292 | 3834 "Compiler did not align stack variables. Libavcodec has been miscompiled\n" |
3835 "and may be very slow or crash. This is not a bug in libavcodec,\n" | |
5542
b0a566346fb1
Add attribute that forces alignment of stack to functions that need it.
ramiro
parents:
5520
diff
changeset
|
3836 "but in the compiler. You may try recompiling using gcc >= 4.2.\n" |
b0a566346fb1
Add attribute that forces alignment of stack to functions that need it.
ramiro
parents:
5520
diff
changeset
|
3837 "Do not report crashes to FFmpeg developers.\n"); |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
3838 #endif |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
3839 did_fail=1; |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
3840 } |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
3841 return -1; |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
3842 } |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
3843 return 0; |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
3844 } |
861 | 3845 |
1201 | 3846 void dsputil_init(DSPContext* c, AVCodecContext *avctx) |
3847 { | |
3848 int i; | |
0 | 3849 |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
3850 ff_check_alignment(); |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
3851 |
1092 | 3852 #ifdef CONFIG_ENCODERS |
1567 | 3853 if(avctx->dct_algo==FF_DCT_FASTINT) { |
1092 | 3854 c->fdct = fdct_ifast; |
2979 | 3855 c->fdct248 = fdct_ifast248; |
2967 | 3856 } |
1567 | 3857 else if(avctx->dct_algo==FF_DCT_FAAN) { |
1557 | 3858 c->fdct = ff_faandct; |
2979 | 3859 c->fdct248 = ff_faandct248; |
2967 | 3860 } |
1567 | 3861 else { |
1092 | 3862 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default |
2979 | 3863 c->fdct248 = ff_fdct248_islow; |
1567 | 3864 } |
1092 | 3865 #endif //CONFIG_ENCODERS |
3866 | |
2256 | 3867 if(avctx->lowres==1){ |
5064 | 3868 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !ENABLE_H264_DECODER){ |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3869 c->idct_put= ff_jref_idct4_put; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3870 c->idct_add= ff_jref_idct4_add; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3871 }else{ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3872 c->idct_put= ff_h264_lowres_idct_put_c; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3873 c->idct_add= ff_h264_lowres_idct_add_c; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3874 } |
2256 | 3875 c->idct = j_rev_dct4; |
1092 | 3876 c->idct_permutation_type= FF_NO_IDCT_PERM; |
2257 | 3877 }else if(avctx->lowres==2){ |
3878 c->idct_put= ff_jref_idct2_put; | |
3879 c->idct_add= ff_jref_idct2_add; | |
3880 c->idct = j_rev_dct2; | |
3881 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
2259 | 3882 }else if(avctx->lowres==3){ |
3883 c->idct_put= ff_jref_idct1_put; | |
3884 c->idct_add= ff_jref_idct1_add; | |
3885 c->idct = j_rev_dct1; | |
3886 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
2256 | 3887 }else{ |
3888 if(avctx->idct_algo==FF_IDCT_INT){ | |
3889 c->idct_put= ff_jref_idct_put; | |
3890 c->idct_add= ff_jref_idct_add; | |
3891 c->idct = j_rev_dct; | |
3892 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; | |
5007 | 3893 }else if((ENABLE_VP3_DECODER || ENABLE_VP5_DECODER || ENABLE_VP6_DECODER || ENABLE_THEORA_DECODER ) && |
3894 avctx->idct_algo==FF_IDCT_VP3){ | |
2693 | 3895 c->idct_put= ff_vp3_idct_put_c; |
3896 c->idct_add= ff_vp3_idct_add_c; | |
3897 c->idct = ff_vp3_idct_c; | |
3898 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
2256 | 3899 }else{ //accurate/default |
3900 c->idct_put= simple_idct_put; | |
3901 c->idct_add= simple_idct_add; | |
3902 c->idct = simple_idct; | |
3903 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
3904 } | |
1092 | 3905 } |
3906 | |
5064 | 3907 if (ENABLE_H264_DECODER) { |
5065 | 3908 c->h264_idct_add= ff_h264_idct_add_c; |
3909 c->h264_idct8_add= ff_h264_idct8_add_c; | |
3910 c->h264_idct_dc_add= ff_h264_idct_dc_add_c; | |
3911 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c; | |
5064 | 3912 } |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3913 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3914 c->get_pixels = get_pixels_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3915 c->diff_pixels = diff_pixels_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3916 c->put_pixels_clamped = put_pixels_clamped_c; |
1984
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
3917 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3918 c->add_pixels_clamped = add_pixels_clamped_c; |
2763 | 3919 c->add_pixels8 = add_pixels8_c; |
3920 c->add_pixels4 = add_pixels4_c; | |
4988
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
3921 c->sum_abs_dctelem = sum_abs_dctelem_c; |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3922 c->gmc1 = gmc1_c; |
3248
7aa9f80e7954
mmx implementation of 3-point GMC. (5x faster than C)
lorenm
parents:
3245
diff
changeset
|
3923 c->gmc = ff_gmc_c; |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3924 c->clear_blocks = clear_blocks_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3925 c->pix_sum = pix_sum_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3926 c->pix_norm1 = pix_norm1_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3927 |
859 | 3928 /* TODO [0] 16 [1] 8 */ |
1708 | 3929 c->pix_abs[0][0] = pix_abs16_c; |
3930 c->pix_abs[0][1] = pix_abs16_x2_c; | |
3931 c->pix_abs[0][2] = pix_abs16_y2_c; | |
3932 c->pix_abs[0][3] = pix_abs16_xy2_c; | |
3933 c->pix_abs[1][0] = pix_abs8_c; | |
3934 c->pix_abs[1][1] = pix_abs8_x2_c; | |
3935 c->pix_abs[1][2] = pix_abs8_y2_c; | |
3936 c->pix_abs[1][3] = pix_abs8_xy2_c; | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3937 |
859 | 3938 #define dspfunc(PFX, IDX, NUM) \ |
3939 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \ | |
3940 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \ | |
3941 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \ | |
3942 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3943 |
859 | 3944 dspfunc(put, 0, 16); |
3945 dspfunc(put_no_rnd, 0, 16); | |
3946 dspfunc(put, 1, 8); | |
3947 dspfunc(put_no_rnd, 1, 8); | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3948 dspfunc(put, 2, 4); |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3949 dspfunc(put, 3, 2); |
0 | 3950 |
859 | 3951 dspfunc(avg, 0, 16); |
3952 dspfunc(avg_no_rnd, 0, 16); | |
3953 dspfunc(avg, 1, 8); | |
3954 dspfunc(avg_no_rnd, 1, 8); | |
1319 | 3955 dspfunc(avg, 2, 4); |
3956 dspfunc(avg, 3, 2); | |
859 | 3957 #undef dspfunc |
857 | 3958 |
1864 | 3959 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c; |
3960 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c; | |
3961 | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3962 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3963 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3964 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3965 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3966 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3967 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3968 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3969 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3970 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3971 |
1319 | 3972 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; |
3973 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; | |
3974 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; | |
3975 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; | |
3976 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; | |
3977 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; | |
3978 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; | |
3979 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; | |
3980 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; | |
3981 | |
859 | 3982 #define dspfunc(PFX, IDX, NUM) \ |
3983 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \ | |
3984 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \ | |
3985 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \ | |
3986 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \ | |
3987 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \ | |
3988 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \ | |
3989 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \ | |
3990 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \ | |
3991 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \ | |
3992 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \ | |
3993 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \ | |
3994 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \ | |
3995 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \ | |
3996 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \ | |
3997 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \ | |
3998 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c | |
857 | 3999 |
859 | 4000 dspfunc(put_qpel, 0, 16); |
4001 dspfunc(put_no_rnd_qpel, 0, 16); | |
4002 | |
4003 dspfunc(avg_qpel, 0, 16); | |
4004 /* dspfunc(avg_no_rnd_qpel, 0, 16); */ | |
857 | 4005 |
859 | 4006 dspfunc(put_qpel, 1, 8); |
4007 dspfunc(put_no_rnd_qpel, 1, 8); | |
4008 | |
4009 dspfunc(avg_qpel, 1, 8); | |
4010 /* dspfunc(avg_no_rnd_qpel, 1, 8); */ | |
1168 | 4011 |
4012 dspfunc(put_h264_qpel, 0, 16); | |
4013 dspfunc(put_h264_qpel, 1, 8); | |
4014 dspfunc(put_h264_qpel, 2, 4); | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
4015 dspfunc(put_h264_qpel, 3, 2); |
1168 | 4016 dspfunc(avg_h264_qpel, 0, 16); |
4017 dspfunc(avg_h264_qpel, 1, 8); | |
4018 dspfunc(avg_h264_qpel, 2, 4); | |
4019 | |
859 | 4020 #undef dspfunc |
1168 | 4021 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c; |
4022 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c; | |
4023 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c; | |
4024 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c; | |
4025 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c; | |
4026 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c; | |
3663 | 4027 c->put_no_rnd_h264_chroma_pixels_tab[0]= put_no_rnd_h264_chroma_mc8_c; |
857 | 4028 |
2415 | 4029 c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c; |
4030 c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c; | |
4031 c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c; | |
4032 c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c; | |
4033 c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c; | |
4034 c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c; | |
4035 c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c; | |
4036 c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c; | |
4037 c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c; | |
4038 c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c; | |
4039 c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c; | |
4040 c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c; | |
4041 c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c; | |
4042 c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c; | |
4043 c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c; | |
4044 c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c; | |
4045 c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c; | |
4046 c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c; | |
4047 c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c; | |
4048 c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c; | |
4049 | |
3432 | 4050 #ifdef CONFIG_CAVS_DECODER |
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
4051 ff_cavsdsp_init(c,avctx); |
3432 | 4052 #endif |
3526 | 4053 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER) |
4054 ff_vc1dsp_init(c,avctx); | |
4055 #endif | |
4296 | 4056 #if defined(CONFIG_H264_ENCODER) |
5411
362aec4ef932
Take care of some renames (Doxygen and function name) after the previous pure rename patch.
takis
parents:
5394
diff
changeset
|
4057 ff_h264dspenc_init(c,avctx); |
4296 | 4058 #endif |
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
4059 |
936 | 4060 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; |
4061 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; | |
4062 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; | |
4063 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; | |
4064 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; | |
4065 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; | |
4066 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; | |
4067 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; | |
2967 | 4068 |
1708 | 4069 #define SET_CMP_FUNC(name) \ |
4070 c->name[0]= name ## 16_c;\ | |
4071 c->name[1]= name ## 8x8_c; | |
2967 | 4072 |
1708 | 4073 SET_CMP_FUNC(hadamard8_diff) |
1729 | 4074 c->hadamard8_diff[4]= hadamard8_intra16_c; |
1708 | 4075 SET_CMP_FUNC(dct_sad) |
2382 | 4076 SET_CMP_FUNC(dct_max) |
3013 | 4077 #ifdef CONFIG_GPL |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
4078 SET_CMP_FUNC(dct264_sad) |
3013 | 4079 #endif |
1708 | 4080 c->sad[0]= pix_abs16_c; |
4081 c->sad[1]= pix_abs8_c; | |
4082 c->sse[0]= sse16_c; | |
4083 c->sse[1]= sse8_c; | |
2184 | 4084 c->sse[2]= sse4_c; |
1708 | 4085 SET_CMP_FUNC(quant_psnr) |
4086 SET_CMP_FUNC(rd) | |
4087 SET_CMP_FUNC(bit) | |
1729 | 4088 c->vsad[0]= vsad16_c; |
4089 c->vsad[4]= vsad_intra16_c; | |
4090 c->vsse[0]= vsse16_c; | |
4091 c->vsse[4]= vsse_intra16_c; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
4092 c->nsse[0]= nsse16_c; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
4093 c->nsse[1]= nsse8_c; |
3373
b8996cc5ccae
Disable w53 and w97 cmp methods when snow encoder is disabled
gpoirier
parents:
3323
diff
changeset
|
4094 #ifdef CONFIG_SNOW_ENCODER |
2184 | 4095 c->w53[0]= w53_16_c; |
4096 c->w53[1]= w53_8_c; | |
4097 c->w97[0]= w97_16_c; | |
4098 c->w97[1]= w97_8_c; | |
3373
b8996cc5ccae
Disable w53 and w97 cmp methods when snow encoder is disabled
gpoirier
parents:
3323
diff
changeset
|
4099 #endif |
2184 | 4100 |
4749 | 4101 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; |
4102 | |
866 | 4103 c->add_bytes= add_bytes_c; |
4104 c->diff_bytes= diff_bytes_c; | |
1527 | 4105 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; |
1273 | 4106 c->bswap_buf= bswap_buf; |
2633 | 4107 |
4108 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c; | |
4109 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; | |
4110 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c; | |
4111 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c; | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
4112 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
4113 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c; |
3645
47821be55b6c
mmx implementation of deblocking strength decision.
lorenm
parents:
3568
diff
changeset
|
4114 c->h264_loop_filter_strength= NULL; |
2967 | 4115 |
5277
7b3fcb7c61ce
Avoid linking with h263.c functions when the relevant codecs
aurel
parents:
5256
diff
changeset
|
4116 if (ENABLE_ANY_H263) { |
5278 | 4117 c->h263_h_loop_filter= h263_h_loop_filter_c; |
4118 c->h263_v_loop_filter= h263_v_loop_filter_c; | |
5277
7b3fcb7c61ce
Avoid linking with h263.c functions when the relevant codecs
aurel
parents:
5256
diff
changeset
|
4119 } |
2967 | 4120 |
2045 | 4121 c->h261_loop_filter= h261_loop_filter_c; |
2967 | 4122 |
1784 | 4123 c->try_8x8basis= try_8x8basis_c; |
4124 c->add_8x8basis= add_8x8basis_c; | |
866 | 4125 |
4598
5111fceeb971
The Snow DEcoding routines should be under CONFIG_SNOW_DECODER.
diego
parents:
4594
diff
changeset
|
4126 #ifdef CONFIG_SNOW_DECODER |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3105
diff
changeset
|
4127 c->vertical_compose97i = ff_snow_vertical_compose97i; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3105
diff
changeset
|
4128 c->horizontal_compose97i = ff_snow_horizontal_compose97i; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3105
diff
changeset
|
4129 c->inner_add_yblock = ff_snow_inner_add_yblock; |
3199
1651e69b9f7a
10l: Only set *compose97i *add_yblock to dsputils context if we are building with Snow enabled
gpoirier
parents:
3198
diff
changeset
|
4130 #endif |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3105
diff
changeset
|
4131 |
3536
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
4132 #ifdef CONFIG_VORBIS_DECODER |
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
4133 c->vorbis_inverse_coupling = vorbis_inverse_coupling; |
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
4134 #endif |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4135 c->vector_fmul = vector_fmul_c; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4136 c->vector_fmul_reverse = vector_fmul_reverse_c; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4137 c->vector_fmul_add_add = ff_vector_fmul_add_add_c; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4138 c->float_to_int16 = ff_float_to_int16_c; |
3536
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
4139 |
3245 | 4140 c->shrink[0]= ff_img_copy_plane; |
4141 c->shrink[1]= ff_shrink22; | |
4142 c->shrink[2]= ff_shrink44; | |
4143 c->shrink[3]= ff_shrink88; | |
4144 | |
3215
06f98047ff26
prefetch pixels for future motion compensation. 2-5% faster h264.
lorenm
parents:
3199
diff
changeset
|
4145 c->prefetch= just_return; |
06f98047ff26
prefetch pixels for future motion compensation. 2-5% faster h264.
lorenm
parents:
3199
diff
changeset
|
4146 |
3807
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4147 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab)); |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4148 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab)); |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4149 |
5146
5db8d038eb1f
use if() instead of #ifdef and ensure all possible optimisations are enabled
aurel
parents:
5143
diff
changeset
|
4150 if (ENABLE_MMX) dsputil_init_mmx (c, avctx); |
5db8d038eb1f
use if() instead of #ifdef and ensure all possible optimisations are enabled
aurel
parents:
5143
diff
changeset
|
4151 if (ENABLE_ARMV4L) dsputil_init_armv4l(c, avctx); |
5db8d038eb1f
use if() instead of #ifdef and ensure all possible optimisations are enabled
aurel
parents:
5143
diff
changeset
|
4152 if (ENABLE_MLIB) dsputil_init_mlib (c, avctx); |
5db8d038eb1f
use if() instead of #ifdef and ensure all possible optimisations are enabled
aurel
parents:
5143
diff
changeset
|
4153 if (ENABLE_SPARC) dsputil_init_vis (c, avctx); |
5db8d038eb1f
use if() instead of #ifdef and ensure all possible optimisations are enabled
aurel
parents:
5143
diff
changeset
|
4154 if (ENABLE_ALPHA) dsputil_init_alpha (c, avctx); |
5db8d038eb1f
use if() instead of #ifdef and ensure all possible optimisations are enabled
aurel
parents:
5143
diff
changeset
|
4155 if (ENABLE_POWERPC) dsputil_init_ppc (c, avctx); |
5db8d038eb1f
use if() instead of #ifdef and ensure all possible optimisations are enabled
aurel
parents:
5143
diff
changeset
|
4156 if (ENABLE_MMI) dsputil_init_mmi (c, avctx); |
5db8d038eb1f
use if() instead of #ifdef and ensure all possible optimisations are enabled
aurel
parents:
5143
diff
changeset
|
4157 if (ENABLE_SH4) dsputil_init_sh4 (c, avctx); |
5db8d038eb1f
use if() instead of #ifdef and ensure all possible optimisations are enabled
aurel
parents:
5143
diff
changeset
|
4158 if (ENABLE_BFIN) dsputil_init_bfin (c, avctx); |
1092 | 4159 |
3807
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4160 for(i=0; i<64; i++){ |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4161 if(!c->put_2tap_qpel_pixels_tab[0][i]) |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4162 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i]; |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4163 if(!c->avg_2tap_qpel_pixels_tab[0][i]) |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4164 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i]; |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4165 } |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4166 |
1092 | 4167 switch(c->idct_permutation_type){ |
4168 case FF_NO_IDCT_PERM: | |
4169 for(i=0; i<64; i++) | |
4170 c->idct_permutation[i]= i; | |
4171 break; | |
4172 case FF_LIBMPEG2_IDCT_PERM: | |
4173 for(i=0; i<64; i++) | |
4174 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |
4175 break; | |
4176 case FF_SIMPLE_IDCT_PERM: | |
4177 for(i=0; i<64; i++) | |
4178 c->idct_permutation[i]= simple_mmx_permutation[i]; | |
4179 break; | |
4180 case FF_TRANSPOSE_IDCT_PERM: | |
4181 for(i=0; i<64; i++) | |
4182 c->idct_permutation[i]= ((i&7)<<3) | (i>>3); | |
4183 break; | |
2696
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4184 case FF_PARTTRANS_IDCT_PERM: |
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4185 for(i=0; i<64; i++) |
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4186 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); |
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4187 break; |
1092 | 4188 default: |
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1571
diff
changeset
|
4189 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); |
1092 | 4190 } |
0 | 4191 } |
252
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
4192 |