Mercurial > libavcodec.hg
annotate dsputil.c @ 8991:ca768cb2bfb6 libavcodec
Use last decoded SPS as current SPS in order to parse picture timing SEI
correctly. This works around an apparent H.264 standard deficiency.
Patch by Ivan Schreter, schreter gmx net
author | cehoyos |
---|---|
date | Fri, 20 Feb 2009 16:20:01 +0000 |
parents | a49197cd37ce |
children | a031926f7d6b |
rev | line source |
---|---|
0 | 1 /* |
2 * DSP utils | |
8629
04423b2f6e0b
cosmetics: Remove pointless period after copyright statement non-sentences.
diego
parents:
8627
diff
changeset
|
3 * Copyright (c) 2000, 2001 Fabrice Bellard |
1739
07a484280a82
copyright year update of the files i touched and remembered, things look annoyingly unmaintained otherwise
michael
parents:
1729
diff
changeset
|
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
0 | 5 * |
5214 | 6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> |
7 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
8 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
9 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
10 * FFmpeg is free software; you can redistribute it and/or |
429 | 11 * modify it under the terms of the GNU Lesser General Public |
12 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
13 * version 2.1 of the License, or (at your option) any later version. |
0 | 14 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
15 * FFmpeg is distributed in the hope that it will be useful, |
0 | 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
18 * Lesser General Public License for more details. | |
0 | 19 * |
429 | 20 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3807
diff
changeset
|
21 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3029
diff
changeset
|
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
0 | 23 */ |
2967 | 24 |
1106 | 25 /** |
8718
e9d9d946f213
Use full internal pathname in doxygen @file directives.
diego
parents:
8629
diff
changeset
|
26 * @file libavcodec/dsputil.c |
1106 | 27 * DSP utils |
28 */ | |
2967 | 29 |
0 | 30 #include "avcodec.h" |
31 #include "dsputil.h" | |
1092 | 32 #include "simple_idct.h" |
1557 | 33 #include "faandct.h" |
6407 | 34 #include "faanidct.h" |
8627
d6bab465b82c
moves mid_pred() into mathops.h (with arch specific code split by directory)
aurel
parents:
8596
diff
changeset
|
35 #include "mathops.h" |
5277
7b3fcb7c61ce
Avoid linking with h263.c functions when the relevant codecs
aurel
parents:
5256
diff
changeset
|
36 #include "h263.h" |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3105
diff
changeset
|
37 #include "snow.h" |
676 | 38 |
2522
e25782262d7d
kill warnings patch by (Mns Rullgrd <mru inprovide com>)
michael
parents:
2448
diff
changeset
|
39 /* snow.c */ |
e25782262d7d
kill warnings patch by (Mns Rullgrd <mru inprovide com>)
michael
parents:
2448
diff
changeset
|
40 void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count); |
e25782262d7d
kill warnings patch by (Mns Rullgrd <mru inprovide com>)
michael
parents:
2448
diff
changeset
|
41 |
3536
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
42 /* vorbis.c */ |
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
43 void vorbis_inverse_coupling(float *mag, float *ang, int blocksize); |
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
44 |
7563 | 45 /* ac3dec.c */ |
46 void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); | |
47 | |
5737 | 48 /* flacenc.c */ |
49 void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc); | |
50 | |
6384 | 51 /* pngdec.c */ |
52 void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp); | |
53 | |
8120 | 54 /* eaidct.c */ |
55 void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block); | |
56 | |
4176 | 57 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; |
4179 | 58 uint32_t ff_squareTbl[512] = {0, }; |
0 | 59 |
6387 | 60 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size |
61 #define pb_7f (~0UL/255 * 0x7f) | |
62 #define pb_80 (~0UL/255 * 0x80) | |
6385 | 63 |
1064 | 64 const uint8_t ff_zigzag_direct[64] = { |
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
65 0, 1, 8, 16, 9, 2, 3, 10, |
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
66 17, 24, 32, 25, 18, 11, 4, 5, |
34 | 67 12, 19, 26, 33, 40, 48, 41, 34, |
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
68 27, 20, 13, 6, 7, 14, 21, 28, |
34 | 69 35, 42, 49, 56, 57, 50, 43, 36, |
70 29, 22, 15, 23, 30, 37, 44, 51, | |
71 58, 59, 52, 45, 38, 31, 39, 46, | |
72 53, 60, 61, 54, 47, 55, 62, 63 | |
73 }; | |
74 | |
1567 | 75 /* Specific zigzag scan for 248 idct. NOTE that unlike the |
76 specification, we interleave the fields */ | |
77 const uint8_t ff_zigzag248_direct[64] = { | |
78 0, 8, 1, 9, 16, 24, 2, 10, | |
79 17, 25, 32, 40, 48, 56, 33, 41, | |
80 18, 26, 3, 11, 4, 12, 19, 27, | |
81 34, 42, 49, 57, 50, 58, 35, 43, | |
82 20, 28, 5, 13, 6, 14, 21, 29, | |
83 36, 44, 51, 59, 52, 60, 37, 45, | |
84 22, 30, 7, 15, 23, 31, 38, 46, | |
85 53, 61, 54, 62, 39, 47, 55, 63, | |
86 }; | |
87 | |
220 | 88 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ |
4197 | 89 DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16[64]) = {0, }; |
220 | 90 |
1064 | 91 const uint8_t ff_alternate_horizontal_scan[64] = { |
2967 | 92 0, 1, 2, 3, 8, 9, 16, 17, |
34 | 93 10, 11, 4, 5, 6, 7, 15, 14, |
2967 | 94 13, 12, 19, 18, 24, 25, 32, 33, |
34 | 95 26, 27, 20, 21, 22, 23, 28, 29, |
2967 | 96 30, 31, 34, 35, 40, 41, 48, 49, |
34 | 97 42, 43, 36, 37, 38, 39, 44, 45, |
2967 | 98 46, 47, 50, 51, 56, 57, 58, 59, |
34 | 99 52, 53, 54, 55, 60, 61, 62, 63, |
100 }; | |
101 | |
1064 | 102 const uint8_t ff_alternate_vertical_scan[64] = { |
2967 | 103 0, 8, 16, 24, 1, 9, 2, 10, |
34 | 104 17, 25, 32, 40, 48, 56, 57, 49, |
2967 | 105 41, 33, 26, 18, 3, 11, 4, 12, |
34 | 106 19, 27, 34, 42, 50, 58, 35, 43, |
2967 | 107 51, 59, 20, 28, 5, 13, 6, 14, |
34 | 108 21, 29, 36, 44, 52, 60, 37, 45, |
2967 | 109 53, 61, 22, 30, 7, 15, 23, 31, |
34 | 110 38, 46, 54, 62, 39, 47, 55, 63, |
111 }; | |
112 | |
220 | 113 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ |
4174 | 114 const uint32_t ff_inverse[256]={ |
2967 | 115 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, |
116 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, | |
117 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, | |
118 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, | |
119 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, | |
120 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, | |
121 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, | |
122 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, | |
123 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, | |
124 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, | |
125 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, | |
126 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, | |
127 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, | |
128 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, | |
129 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, | |
130 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, | |
131 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, | |
132 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, | |
133 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, | |
134 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, | |
135 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, | |
136 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, | |
137 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, | |
138 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, | |
139 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, | |
140 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, | |
141 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, | |
142 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, | |
143 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, | |
144 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, | |
145 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, | |
220 | 146 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, |
147 }; | |
148 | |
1092 | 149 /* Input permutation for the simple_idct_mmx */ |
150 static const uint8_t simple_mmx_permutation[64]={ | |
2979 | 151 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, |
152 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |
153 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |
154 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |
155 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |
156 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |
157 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |
158 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |
1092 | 159 }; |
160 | |
6600
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
161 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7}; |
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
162 |
6438 | 163 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){ |
164 int i; | |
165 int end; | |
166 | |
167 st->scantable= src_scantable; | |
168 | |
169 for(i=0; i<64; i++){ | |
170 int j; | |
171 j = src_scantable[i]; | |
172 st->permutated[i] = permutation[j]; | |
8590 | 173 #if ARCH_PPC |
6438 | 174 st->inverse[j] = i; |
175 #endif | |
176 } | |
177 | |
178 end=-1; | |
179 for(i=0; i<64; i++){ | |
180 int j; | |
181 j = st->permutated[i]; | |
182 if(j>end) end=j; | |
183 st->raster_end[i]= end; | |
184 } | |
185 } | |
186 | |
1064 | 187 static int pix_sum_c(uint8_t * pix, int line_size) |
612 | 188 { |
189 int s, i, j; | |
190 | |
191 s = 0; | |
192 for (i = 0; i < 16; i++) { | |
2979 | 193 for (j = 0; j < 16; j += 8) { |
194 s += pix[0]; | |
195 s += pix[1]; | |
196 s += pix[2]; | |
197 s += pix[3]; | |
198 s += pix[4]; | |
199 s += pix[5]; | |
200 s += pix[6]; | |
201 s += pix[7]; | |
202 pix += 8; | |
203 } | |
204 pix += line_size - 16; | |
612 | 205 } |
206 return s; | |
207 } | |
208 | |
1064 | 209 static int pix_norm1_c(uint8_t * pix, int line_size) |
612 | 210 { |
211 int s, i, j; | |
4179 | 212 uint32_t *sq = ff_squareTbl + 256; |
612 | 213 |
214 s = 0; | |
215 for (i = 0; i < 16; i++) { | |
2979 | 216 for (j = 0; j < 16; j += 8) { |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
217 #if 0 |
2979 | 218 s += sq[pix[0]]; |
219 s += sq[pix[1]]; | |
220 s += sq[pix[2]]; | |
221 s += sq[pix[3]]; | |
222 s += sq[pix[4]]; | |
223 s += sq[pix[5]]; | |
224 s += sq[pix[6]]; | |
225 s += sq[pix[7]]; | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
226 #else |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
227 #if LONG_MAX > 2147483647 |
2979 | 228 register uint64_t x=*(uint64_t*)pix; |
229 s += sq[x&0xff]; | |
230 s += sq[(x>>8)&0xff]; | |
231 s += sq[(x>>16)&0xff]; | |
232 s += sq[(x>>24)&0xff]; | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
233 s += sq[(x>>32)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
234 s += sq[(x>>40)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
235 s += sq[(x>>48)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
236 s += sq[(x>>56)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
237 #else |
2979 | 238 register uint32_t x=*(uint32_t*)pix; |
239 s += sq[x&0xff]; | |
240 s += sq[(x>>8)&0xff]; | |
241 s += sq[(x>>16)&0xff]; | |
242 s += sq[(x>>24)&0xff]; | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
243 x=*(uint32_t*)(pix+4); |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
244 s += sq[x&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
245 s += sq[(x>>8)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
246 s += sq[(x>>16)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
247 s += sq[(x>>24)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
248 #endif |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
249 #endif |
2979 | 250 pix += 8; |
251 } | |
252 pix += line_size - 16; | |
612 | 253 } |
254 return s; | |
255 } | |
256 | |
6241 | 257 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){ |
1273 | 258 int i; |
2967 | 259 |
1273 | 260 for(i=0; i+8<=w; i+=8){ |
261 dst[i+0]= bswap_32(src[i+0]); | |
262 dst[i+1]= bswap_32(src[i+1]); | |
263 dst[i+2]= bswap_32(src[i+2]); | |
264 dst[i+3]= bswap_32(src[i+3]); | |
265 dst[i+4]= bswap_32(src[i+4]); | |
266 dst[i+5]= bswap_32(src[i+5]); | |
267 dst[i+6]= bswap_32(src[i+6]); | |
268 dst[i+7]= bswap_32(src[i+7]); | |
269 } | |
270 for(;i<w; i++){ | |
271 dst[i+0]= bswap_32(src[i+0]); | |
272 } | |
273 } | |
612 | 274 |
2184 | 275 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) |
276 { | |
277 int s, i; | |
4179 | 278 uint32_t *sq = ff_squareTbl + 256; |
2184 | 279 |
280 s = 0; | |
281 for (i = 0; i < h; i++) { | |
282 s += sq[pix1[0] - pix2[0]]; | |
283 s += sq[pix1[1] - pix2[1]]; | |
284 s += sq[pix1[2] - pix2[2]]; | |
285 s += sq[pix1[3] - pix2[3]]; | |
286 pix1 += line_size; | |
287 pix2 += line_size; | |
288 } | |
289 return s; | |
290 } | |
291 | |
1708 | 292 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) |
936 | 293 { |
294 int s, i; | |
4179 | 295 uint32_t *sq = ff_squareTbl + 256; |
936 | 296 |
297 s = 0; | |
1708 | 298 for (i = 0; i < h; i++) { |
936 | 299 s += sq[pix1[0] - pix2[0]]; |
300 s += sq[pix1[1] - pix2[1]]; | |
301 s += sq[pix1[2] - pix2[2]]; | |
302 s += sq[pix1[3] - pix2[3]]; | |
303 s += sq[pix1[4] - pix2[4]]; | |
304 s += sq[pix1[5] - pix2[5]]; | |
305 s += sq[pix1[6] - pix2[6]]; | |
306 s += sq[pix1[7] - pix2[7]]; | |
307 pix1 += line_size; | |
308 pix2 += line_size; | |
309 } | |
310 return s; | |
311 } | |
312 | |
1708 | 313 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
884 | 314 { |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
315 int s, i; |
4179 | 316 uint32_t *sq = ff_squareTbl + 256; |
884 | 317 |
318 s = 0; | |
1708 | 319 for (i = 0; i < h; i++) { |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
320 s += sq[pix1[ 0] - pix2[ 0]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
321 s += sq[pix1[ 1] - pix2[ 1]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
322 s += sq[pix1[ 2] - pix2[ 2]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
323 s += sq[pix1[ 3] - pix2[ 3]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
324 s += sq[pix1[ 4] - pix2[ 4]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
325 s += sq[pix1[ 5] - pix2[ 5]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
326 s += sq[pix1[ 6] - pix2[ 6]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
327 s += sq[pix1[ 7] - pix2[ 7]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
328 s += sq[pix1[ 8] - pix2[ 8]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
329 s += sq[pix1[ 9] - pix2[ 9]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
330 s += sq[pix1[10] - pix2[10]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
331 s += sq[pix1[11] - pix2[11]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
332 s += sq[pix1[12] - pix2[12]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
333 s += sq[pix1[13] - pix2[13]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
334 s += sq[pix1[14] - pix2[14]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
335 s += sq[pix1[15] - pix2[15]]; |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
336 |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
337 pix1 += line_size; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
338 pix2 += line_size; |
884 | 339 } |
340 return s; | |
341 } | |
342 | |
2184 | 343 |
8590 | 344 #if CONFIG_SNOW_ENCODER //dwt is in snow.c |
2184 | 345 static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){ |
346 int s, i, j; | |
347 const int dec_count= w==8 ? 3 : 4; | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
348 int tmp[32*32]; |
2184 | 349 int level, ori; |
2967 | 350 static const int scale[2][2][4][4]={ |
2184 | 351 { |
352 { | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
353 // 9/7 8x8 dec=3 |
2184 | 354 {268, 239, 239, 213}, |
355 { 0, 224, 224, 152}, | |
356 { 0, 135, 135, 110}, | |
357 },{ | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
358 // 9/7 16x16 or 32x32 dec=4 |
2184 | 359 {344, 310, 310, 280}, |
360 { 0, 320, 320, 228}, | |
361 { 0, 175, 175, 136}, | |
362 { 0, 129, 129, 102}, | |
363 } | |
364 },{ | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
365 { |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
366 // 5/3 8x8 dec=3 |
2184 | 367 {275, 245, 245, 218}, |
368 { 0, 230, 230, 156}, | |
369 { 0, 138, 138, 113}, | |
370 },{ | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
371 // 5/3 16x16 or 32x32 dec=4 |
2184 | 372 {352, 317, 317, 286}, |
373 { 0, 328, 328, 233}, | |
374 { 0, 180, 180, 140}, | |
375 { 0, 132, 132, 105}, | |
376 } | |
377 } | |
378 }; | |
379 | |
380 for (i = 0; i < h; i++) { | |
381 for (j = 0; j < w; j+=4) { | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
382 tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
383 tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
384 tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
385 tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4; |
2184 | 386 } |
387 pix1 += line_size; | |
388 pix2 += line_size; | |
389 } | |
2639 | 390 |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
391 ff_spatial_dwt(tmp, w, h, 32, type, dec_count); |
2184 | 392 |
393 s=0; | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
394 assert(w==h); |
2184 | 395 for(level=0; level<dec_count; level++){ |
396 for(ori= level ? 1 : 0; ori<4; ori++){ | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
397 int size= w>>(dec_count-level); |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
398 int sx= (ori&1) ? size : 0; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
399 int stride= 32<<(dec_count-level); |
2184 | 400 int sy= (ori&2) ? stride>>1 : 0; |
2967 | 401 |
2184 | 402 for(i=0; i<size; i++){ |
403 for(j=0; j<size; j++){ | |
404 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori]; | |
4001 | 405 s += FFABS(v); |
2184 | 406 } |
407 } | |
408 } | |
409 } | |
2967 | 410 assert(s>=0); |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
411 return s>>9; |
2184 | 412 } |
413 | |
414 static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | |
415 return w_c(v, pix1, pix2, line_size, 8, h, 1); | |
416 } | |
417 | |
418 static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | |
419 return w_c(v, pix1, pix2, line_size, 8, h, 0); | |
420 } | |
421 | |
422 static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | |
423 return w_c(v, pix1, pix2, line_size, 16, h, 1); | |
424 } | |
425 | |
426 static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | |
427 return w_c(v, pix1, pix2, line_size, 16, h, 0); | |
428 } | |
429 | |
4197 | 430 int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
431 return w_c(v, pix1, pix2, line_size, 32, h, 1); |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
432 } |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
433 |
4197 | 434 int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
435 return w_c(v, pix1, pix2, line_size, 32, h, 0); |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
436 } |
3373
b8996cc5ccae
Disable w53 and w97 cmp methods when snow encoder is disabled
gpoirier
parents:
3323
diff
changeset
|
437 #endif |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3248
diff
changeset
|
438 |
6437 | 439 /* draw the edges of width 'w' of an image of size width, height */ |
440 //FIXME check that this is ok for mpeg4 interlaced | |
441 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w) | |
442 { | |
443 uint8_t *ptr, *last_line; | |
444 int i; | |
445 | |
446 last_line = buf + (height - 1) * wrap; | |
447 for(i=0;i<w;i++) { | |
448 /* top and bottom */ | |
449 memcpy(buf - (i + 1) * wrap, buf, width); | |
450 memcpy(last_line + (i + 1) * wrap, last_line, width); | |
451 } | |
452 /* left and right */ | |
453 ptr = buf; | |
454 for(i=0;i<height;i++) { | |
455 memset(ptr - w, ptr[0], w); | |
456 memset(ptr + width, ptr[width-1], w); | |
457 ptr += wrap; | |
458 } | |
459 /* corners */ | |
460 for(i=0;i<w;i++) { | |
461 memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */ | |
462 memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */ | |
463 memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */ | |
464 memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */ | |
465 } | |
466 } | |
467 | |
6445 | 468 /** |
469 * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples. | |
470 * @param buf destination buffer | |
471 * @param src source buffer | |
472 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers | |
473 * @param block_w width of block | |
474 * @param block_h height of block | |
475 * @param src_x x coordinate of the top left sample of the block in the source buffer | |
476 * @param src_y y coordinate of the top left sample of the block in the source buffer | |
477 * @param w width of the source buffer | |
478 * @param h height of the source buffer | |
479 */ | |
480 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h, | |
481 int src_x, int src_y, int w, int h){ | |
482 int x, y; | |
483 int start_y, start_x, end_y, end_x; | |
484 | |
485 if(src_y>= h){ | |
486 src+= (h-1-src_y)*linesize; | |
487 src_y=h-1; | |
488 }else if(src_y<=-block_h){ | |
489 src+= (1-block_h-src_y)*linesize; | |
490 src_y=1-block_h; | |
491 } | |
492 if(src_x>= w){ | |
493 src+= (w-1-src_x); | |
494 src_x=w-1; | |
495 }else if(src_x<=-block_w){ | |
496 src+= (1-block_w-src_x); | |
497 src_x=1-block_w; | |
498 } | |
499 | |
500 start_y= FFMAX(0, -src_y); | |
501 start_x= FFMAX(0, -src_x); | |
502 end_y= FFMIN(block_h, h-src_y); | |
503 end_x= FFMIN(block_w, w-src_x); | |
504 | |
505 // copy existing part | |
506 for(y=start_y; y<end_y; y++){ | |
507 for(x=start_x; x<end_x; x++){ | |
508 buf[x + y*linesize]= src[x + y*linesize]; | |
509 } | |
510 } | |
511 | |
512 //top | |
513 for(y=0; y<start_y; y++){ | |
514 for(x=start_x; x<end_x; x++){ | |
515 buf[x + y*linesize]= buf[x + start_y*linesize]; | |
516 } | |
517 } | |
518 | |
519 //bottom | |
520 for(y=end_y; y<block_h; y++){ | |
521 for(x=start_x; x<end_x; x++){ | |
522 buf[x + y*linesize]= buf[x + (end_y-1)*linesize]; | |
523 } | |
524 } | |
525 | |
526 for(y=0; y<block_h; y++){ | |
527 //left | |
528 for(x=0; x<start_x; x++){ | |
529 buf[x + y*linesize]= buf[start_x + y*linesize]; | |
530 } | |
531 | |
532 //right | |
533 for(x=end_x; x<block_w; x++){ | |
534 buf[x + y*linesize]= buf[end_x - 1 + y*linesize]; | |
535 } | |
536 } | |
537 } | |
538 | |
1064 | 539 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) |
0 | 540 { |
541 int i; | |
542 | |
543 /* read the pixels */ | |
544 for(i=0;i<8;i++) { | |
516 | 545 block[0] = pixels[0]; |
546 block[1] = pixels[1]; | |
547 block[2] = pixels[2]; | |
548 block[3] = pixels[3]; | |
549 block[4] = pixels[4]; | |
550 block[5] = pixels[5]; | |
551 block[6] = pixels[6]; | |
552 block[7] = pixels[7]; | |
553 pixels += line_size; | |
554 block += 8; | |
0 | 555 } |
556 } | |
557 | |
1064 | 558 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, |
2979 | 559 const uint8_t *s2, int stride){ |
324 | 560 int i; |
561 | |
562 /* read the pixels */ | |
563 for(i=0;i<8;i++) { | |
516 | 564 block[0] = s1[0] - s2[0]; |
565 block[1] = s1[1] - s2[1]; | |
566 block[2] = s1[2] - s2[2]; | |
567 block[3] = s1[3] - s2[3]; | |
568 block[4] = s1[4] - s2[4]; | |
569 block[5] = s1[5] - s2[5]; | |
570 block[6] = s1[6] - s2[6]; | |
571 block[7] = s1[7] - s2[7]; | |
324 | 572 s1 += stride; |
573 s2 += stride; | |
516 | 574 block += 8; |
324 | 575 } |
576 } | |
577 | |
578 | |
1064 | 579 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
2979 | 580 int line_size) |
0 | 581 { |
582 int i; | |
4176 | 583 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 584 |
0 | 585 /* read the pixels */ |
586 for(i=0;i<8;i++) { | |
516 | 587 pixels[0] = cm[block[0]]; |
588 pixels[1] = cm[block[1]]; | |
589 pixels[2] = cm[block[2]]; | |
590 pixels[3] = cm[block[3]]; | |
591 pixels[4] = cm[block[4]]; | |
592 pixels[5] = cm[block[5]]; | |
593 pixels[6] = cm[block[6]]; | |
594 pixels[7] = cm[block[7]]; | |
595 | |
596 pixels += line_size; | |
597 block += 8; | |
0 | 598 } |
599 } | |
600 | |
2256 | 601 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, |
2979 | 602 int line_size) |
2256 | 603 { |
604 int i; | |
4176 | 605 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 606 |
2256 | 607 /* read the pixels */ |
608 for(i=0;i<4;i++) { | |
609 pixels[0] = cm[block[0]]; | |
610 pixels[1] = cm[block[1]]; | |
611 pixels[2] = cm[block[2]]; | |
612 pixels[3] = cm[block[3]]; | |
613 | |
614 pixels += line_size; | |
615 block += 8; | |
616 } | |
617 } | |
618 | |
2257 | 619 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, |
2979 | 620 int line_size) |
2257 | 621 { |
622 int i; | |
4176 | 623 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 624 |
2257 | 625 /* read the pixels */ |
626 for(i=0;i<2;i++) { | |
627 pixels[0] = cm[block[0]]; | |
628 pixels[1] = cm[block[1]]; | |
629 | |
630 pixels += line_size; | |
631 block += 8; | |
632 } | |
633 } | |
634 | |
2967 | 635 static void put_signed_pixels_clamped_c(const DCTELEM *block, |
1984
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
636 uint8_t *restrict pixels, |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
637 int line_size) |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
638 { |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
639 int i, j; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
640 |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
641 for (i = 0; i < 8; i++) { |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
642 for (j = 0; j < 8; j++) { |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
643 if (*block < -128) |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
644 *pixels = 0; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
645 else if (*block > 127) |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
646 *pixels = 255; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
647 else |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
648 *pixels = (uint8_t)(*block + 128); |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
649 block++; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
650 pixels++; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
651 } |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
652 pixels += (line_size - 8); |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
653 } |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
654 } |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
655 |
1064 | 656 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
516 | 657 int line_size) |
0 | 658 { |
659 int i; | |
4176 | 660 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 661 |
0 | 662 /* read the pixels */ |
663 for(i=0;i<8;i++) { | |
516 | 664 pixels[0] = cm[pixels[0] + block[0]]; |
665 pixels[1] = cm[pixels[1] + block[1]]; | |
666 pixels[2] = cm[pixels[2] + block[2]]; | |
667 pixels[3] = cm[pixels[3] + block[3]]; | |
668 pixels[4] = cm[pixels[4] + block[4]]; | |
669 pixels[5] = cm[pixels[5] + block[5]]; | |
670 pixels[6] = cm[pixels[6] + block[6]]; | |
671 pixels[7] = cm[pixels[7] + block[7]]; | |
672 pixels += line_size; | |
673 block += 8; | |
0 | 674 } |
675 } | |
2256 | 676 |
677 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, | |
678 int line_size) | |
679 { | |
680 int i; | |
4176 | 681 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 682 |
2256 | 683 /* read the pixels */ |
684 for(i=0;i<4;i++) { | |
685 pixels[0] = cm[pixels[0] + block[0]]; | |
686 pixels[1] = cm[pixels[1] + block[1]]; | |
687 pixels[2] = cm[pixels[2] + block[2]]; | |
688 pixels[3] = cm[pixels[3] + block[3]]; | |
689 pixels += line_size; | |
690 block += 8; | |
691 } | |
692 } | |
2257 | 693 |
694 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, | |
695 int line_size) | |
696 { | |
697 int i; | |
4176 | 698 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2967 | 699 |
2257 | 700 /* read the pixels */ |
701 for(i=0;i<2;i++) { | |
702 pixels[0] = cm[pixels[0] + block[0]]; | |
703 pixels[1] = cm[pixels[1] + block[1]]; | |
704 pixels += line_size; | |
705 block += 8; | |
706 } | |
707 } | |
2763 | 708 |
709 static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) | |
710 { | |
711 int i; | |
712 for(i=0;i<8;i++) { | |
713 pixels[0] += block[0]; | |
714 pixels[1] += block[1]; | |
715 pixels[2] += block[2]; | |
716 pixels[3] += block[3]; | |
717 pixels[4] += block[4]; | |
718 pixels[5] += block[5]; | |
719 pixels[6] += block[6]; | |
720 pixels[7] += block[7]; | |
721 pixels += line_size; | |
722 block += 8; | |
723 } | |
724 } | |
725 | |
726 static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) | |
727 { | |
728 int i; | |
729 for(i=0;i<4;i++) { | |
730 pixels[0] += block[0]; | |
731 pixels[1] += block[1]; | |
732 pixels[2] += block[2]; | |
733 pixels[3] += block[3]; | |
734 pixels += line_size; | |
735 block += 4; | |
736 } | |
737 } | |
738 | |
4988
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
739 static int sum_abs_dctelem_c(DCTELEM *block) |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
740 { |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
741 int sum=0, i; |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
742 for(i=0; i<64; i++) |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
743 sum+= FFABS(block[i]); |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
744 return sum; |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
745 } |
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
746 |
385 | 747 #if 0 |
748 | |
749 #define PIXOP2(OPNAME, OP) \ | |
651 | 750 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 751 {\ |
752 int i;\ | |
753 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
754 OP(*((uint64_t*)block), AV_RN64(pixels));\ |
385 | 755 pixels+=line_size;\ |
756 block +=line_size;\ | |
757 }\ | |
758 }\ | |
759 \ | |
859 | 760 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 761 {\ |
762 int i;\ | |
763 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
764 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
765 const uint64_t b= AV_RN64(pixels+1);\ |
385 | 766 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
767 pixels+=line_size;\ | |
768 block +=line_size;\ | |
769 }\ | |
770 }\ | |
771 \ | |
859 | 772 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 773 {\ |
774 int i;\ | |
775 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
776 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
777 const uint64_t b= AV_RN64(pixels+1);\ |
385 | 778 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
779 pixels+=line_size;\ | |
780 block +=line_size;\ | |
781 }\ | |
782 }\ | |
783 \ | |
859 | 784 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 785 {\ |
786 int i;\ | |
787 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
788 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
789 const uint64_t b= AV_RN64(pixels+line_size);\ |
385 | 790 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
791 pixels+=line_size;\ | |
792 block +=line_size;\ | |
793 }\ | |
794 }\ | |
795 \ | |
859 | 796 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 797 {\ |
798 int i;\ | |
799 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
800 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
801 const uint64_t b= AV_RN64(pixels+line_size);\ |
385 | 802 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ |
803 pixels+=line_size;\ | |
804 block +=line_size;\ | |
805 }\ | |
806 }\ | |
807 \ | |
859 | 808 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 809 {\ |
810 int i;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
811 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
812 const uint64_t b= AV_RN64(pixels+1);\ |
385 | 813 uint64_t l0= (a&0x0303030303030303ULL)\ |
814 + (b&0x0303030303030303ULL)\ | |
815 + 0x0202020202020202ULL;\ | |
816 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
817 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
818 uint64_t l1,h1;\ | |
819 \ | |
820 pixels+=line_size;\ | |
821 for(i=0; i<h; i+=2){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
822 uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
823 uint64_t b= AV_RN64(pixels+1);\ |
385 | 824 l1= (a&0x0303030303030303ULL)\ |
825 + (b&0x0303030303030303ULL);\ | |
826 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
827 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
828 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
829 pixels+=line_size;\ | |
830 block +=line_size;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
831 a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
832 b= AV_RN64(pixels+1);\ |
385 | 833 l0= (a&0x0303030303030303ULL)\ |
834 + (b&0x0303030303030303ULL)\ | |
835 + 0x0202020202020202ULL;\ | |
836 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
837 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
838 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
839 pixels+=line_size;\ | |
840 block +=line_size;\ | |
841 }\ | |
842 }\ | |
843 \ | |
859 | 844 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 845 {\ |
846 int i;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
847 const uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
848 const uint64_t b= AV_RN64(pixels+1);\ |
385 | 849 uint64_t l0= (a&0x0303030303030303ULL)\ |
850 + (b&0x0303030303030303ULL)\ | |
851 + 0x0101010101010101ULL;\ | |
852 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
853 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
854 uint64_t l1,h1;\ | |
855 \ | |
856 pixels+=line_size;\ | |
857 for(i=0; i<h; i+=2){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
858 uint64_t a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
859 uint64_t b= AV_RN64(pixels+1);\ |
385 | 860 l1= (a&0x0303030303030303ULL)\ |
861 + (b&0x0303030303030303ULL);\ | |
862 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
863 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
864 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
865 pixels+=line_size;\ | |
866 block +=line_size;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
867 a= AV_RN64(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
868 b= AV_RN64(pixels+1);\ |
385 | 869 l0= (a&0x0303030303030303ULL)\ |
870 + (b&0x0303030303030303ULL)\ | |
871 + 0x0101010101010101ULL;\ | |
872 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
873 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
874 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
875 pixels+=line_size;\ | |
876 block +=line_size;\ | |
877 }\ | |
878 }\ | |
879 \ | |
859 | 880 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\ |
881 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\ | |
882 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\ | |
883 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\ | |
884 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\ | |
885 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\ | |
886 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8) | |
385 | 887 |
888 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) | |
889 #else // 64 bit variant | |
890 | |
891 #define PIXOP2(OPNAME, OP) \ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
892 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
893 int i;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
894 for(i=0; i<h; i++){\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
895 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
896 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
897 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
898 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
899 }\ |
1168 | 900 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
901 int i;\ | |
902 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
903 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ |
1168 | 904 pixels+=line_size;\ |
905 block +=line_size;\ | |
906 }\ | |
907 }\ | |
859 | 908 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
385 | 909 int i;\ |
910 for(i=0; i<h; i++){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
911 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
912 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\ |
385 | 913 pixels+=line_size;\ |
914 block +=line_size;\ | |
915 }\ | |
916 }\ | |
859 | 917 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
918 OPNAME ## _pixels8_c(block, pixels, line_size, h);\ | |
651 | 919 }\ |
385 | 920 \ |
651 | 921 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
922 int src_stride1, int src_stride2, int h){\ | |
385 | 923 int i;\ |
924 for(i=0; i<h; i++){\ | |
651 | 925 uint32_t a,b;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
926 a= AV_RN32(&src1[i*src_stride1 ]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
927 b= AV_RN32(&src2[i*src_stride2 ]);\ |
1264 | 928 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
929 a= AV_RN32(&src1[i*src_stride1+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
930 b= AV_RN32(&src2[i*src_stride2+4]);\ |
1264 | 931 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ |
385 | 932 }\ |
933 }\ | |
934 \ | |
651 | 935 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
936 int src_stride1, int src_stride2, int h){\ | |
385 | 937 int i;\ |
938 for(i=0; i<h; i++){\ | |
651 | 939 uint32_t a,b;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
940 a= AV_RN32(&src1[i*src_stride1 ]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
941 b= AV_RN32(&src2[i*src_stride2 ]);\ |
1264 | 942 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
943 a= AV_RN32(&src1[i*src_stride1+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
944 b= AV_RN32(&src2[i*src_stride2+4]);\ |
1264 | 945 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ |
385 | 946 }\ |
947 }\ | |
948 \ | |
1168 | 949 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
950 int src_stride1, int src_stride2, int h){\ | |
951 int i;\ | |
952 for(i=0; i<h; i++){\ | |
953 uint32_t a,b;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
954 a= AV_RN32(&src1[i*src_stride1 ]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
955 b= AV_RN32(&src2[i*src_stride2 ]);\ |
1264 | 956 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
1168 | 957 }\ |
958 }\ | |
959 \ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
960 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
961 int src_stride1, int src_stride2, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
962 int i;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
963 for(i=0; i<h; i++){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
964 uint32_t a,b;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
965 a= AV_RN16(&src1[i*src_stride1 ]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
966 b= AV_RN16(&src2[i*src_stride2 ]);\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
967 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
968 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
969 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
970 \ |
651 | 971 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
972 int src_stride1, int src_stride2, int h){\ | |
973 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | |
974 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | |
975 }\ | |
976 \ | |
977 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | |
978 int src_stride1, int src_stride2, int h){\ | |
979 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | |
980 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | |
981 }\ | |
982 \ | |
859 | 983 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 984 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
985 }\ | |
986 \ | |
859 | 987 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 988 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
989 }\ | |
990 \ | |
859 | 991 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 992 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
993 }\ | |
994 \ | |
859 | 995 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 996 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
385 | 997 }\ |
998 \ | |
651 | 999 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
1000 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
1001 int i;\ | |
1002 for(i=0; i<h; i++){\ | |
1003 uint32_t a, b, c, d, l0, l1, h0, h1;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1004 a= AV_RN32(&src1[i*src_stride1]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1005 b= AV_RN32(&src2[i*src_stride2]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1006 c= AV_RN32(&src3[i*src_stride3]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1007 d= AV_RN32(&src4[i*src_stride4]);\ |
651 | 1008 l0= (a&0x03030303UL)\ |
1009 + (b&0x03030303UL)\ | |
1010 + 0x02020202UL;\ | |
1011 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1012 + ((b&0xFCFCFCFCUL)>>2);\ | |
1013 l1= (c&0x03030303UL)\ | |
1014 + (d&0x03030303UL);\ | |
1015 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
1016 + ((d&0xFCFCFCFCUL)>>2);\ | |
1017 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1018 a= AV_RN32(&src1[i*src_stride1+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1019 b= AV_RN32(&src2[i*src_stride2+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1020 c= AV_RN32(&src3[i*src_stride3+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1021 d= AV_RN32(&src4[i*src_stride4+4]);\ |
651 | 1022 l0= (a&0x03030303UL)\ |
1023 + (b&0x03030303UL)\ | |
1024 + 0x02020202UL;\ | |
1025 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1026 + ((b&0xFCFCFCFCUL)>>2);\ | |
1027 l1= (c&0x03030303UL)\ | |
1028 + (d&0x03030303UL);\ | |
1029 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
1030 + ((d&0xFCFCFCFCUL)>>2);\ | |
1031 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1032 }\ | |
1033 }\ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1034 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1035 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1036 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1037 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1038 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1039 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1040 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1041 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1042 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1043 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1044 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1045 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1046 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1047 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1048 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1049 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1050 \ |
651 | 1051 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
1052 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
385 | 1053 int i;\ |
1054 for(i=0; i<h; i++){\ | |
651 | 1055 uint32_t a, b, c, d, l0, l1, h0, h1;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1056 a= AV_RN32(&src1[i*src_stride1]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1057 b= AV_RN32(&src2[i*src_stride2]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1058 c= AV_RN32(&src3[i*src_stride3]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1059 d= AV_RN32(&src4[i*src_stride4]);\ |
651 | 1060 l0= (a&0x03030303UL)\ |
1061 + (b&0x03030303UL)\ | |
1062 + 0x01010101UL;\ | |
1063 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1064 + ((b&0xFCFCFCFCUL)>>2);\ | |
1065 l1= (c&0x03030303UL)\ | |
1066 + (d&0x03030303UL);\ | |
1067 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
1068 + ((d&0xFCFCFCFCUL)>>2);\ | |
1069 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1070 a= AV_RN32(&src1[i*src_stride1+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1071 b= AV_RN32(&src2[i*src_stride2+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1072 c= AV_RN32(&src3[i*src_stride3+4]);\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1073 d= AV_RN32(&src4[i*src_stride4+4]);\ |
651 | 1074 l0= (a&0x03030303UL)\ |
1075 + (b&0x03030303UL)\ | |
1076 + 0x01010101UL;\ | |
1077 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1078 + ((b&0xFCFCFCFCUL)>>2);\ | |
1079 l1= (c&0x03030303UL)\ | |
1080 + (d&0x03030303UL);\ | |
1081 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
1082 + ((d&0xFCFCFCFCUL)>>2);\ | |
1083 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
385 | 1084 }\ |
1085 }\ | |
651 | 1086 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
1087 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
1088 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
1089 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
1090 }\ | |
1091 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ | |
1092 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
1093 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
1094 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
1095 }\ | |
385 | 1096 \ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1097 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1098 {\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1099 int i, a0, b0, a1, b1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1100 a0= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1101 b0= pixels[1] + 2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1102 a0 += b0;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1103 b0 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1104 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1105 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1106 for(i=0; i<h; i+=2){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1107 a1= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1108 b1= pixels[1];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1109 a1 += b1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1110 b1 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1111 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1112 block[0]= (a1+a0)>>2; /* FIXME non put */\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1113 block[1]= (b1+b0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1114 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1115 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1116 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1117 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1118 a0= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1119 b0= pixels[1] + 2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1120 a0 += b0;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1121 b0 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1122 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1123 block[0]= (a1+a0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1124 block[1]= (b1+b0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1125 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1126 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1127 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1128 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1129 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1130 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1131 {\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1132 int i;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1133 const uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1134 const uint32_t b= AV_RN32(pixels+1);\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1135 uint32_t l0= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1136 + (b&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1137 + 0x02020202UL;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1138 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1139 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1140 uint32_t l1,h1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1141 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1142 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1143 for(i=0; i<h; i+=2){\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1144 uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1145 uint32_t b= AV_RN32(pixels+1);\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1146 l1= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1147 + (b&0x03030303UL);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1148 h1= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1149 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1150 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1151 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1152 block +=line_size;\ |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1153 a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1154 b= AV_RN32(pixels+1);\ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1155 l0= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1156 + (b&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1157 + 0x02020202UL;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1158 h0= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1159 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1160 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1161 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1162 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1163 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1164 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1165 \ |
859 | 1166 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 1167 {\ |
1168 int j;\ | |
1169 for(j=0; j<2; j++){\ | |
1170 int i;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1171 const uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1172 const uint32_t b= AV_RN32(pixels+1);\ |
385 | 1173 uint32_t l0= (a&0x03030303UL)\ |
1174 + (b&0x03030303UL)\ | |
1175 + 0x02020202UL;\ | |
1176 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1177 + ((b&0xFCFCFCFCUL)>>2);\ | |
1178 uint32_t l1,h1;\ | |
1179 \ | |
1180 pixels+=line_size;\ | |
1181 for(i=0; i<h; i+=2){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1182 uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1183 uint32_t b= AV_RN32(pixels+1);\ |
385 | 1184 l1= (a&0x03030303UL)\ |
1185 + (b&0x03030303UL);\ | |
1186 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
1187 + ((b&0xFCFCFCFCUL)>>2);\ | |
1188 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1189 pixels+=line_size;\ | |
1190 block +=line_size;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1191 a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1192 b= AV_RN32(pixels+1);\ |
385 | 1193 l0= (a&0x03030303UL)\ |
1194 + (b&0x03030303UL)\ | |
1195 + 0x02020202UL;\ | |
1196 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1197 + ((b&0xFCFCFCFCUL)>>2);\ | |
1198 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1199 pixels+=line_size;\ | |
1200 block +=line_size;\ | |
1201 }\ | |
1202 pixels+=4-line_size*(h+1);\ | |
1203 block +=4-line_size*h;\ | |
1204 }\ | |
1205 }\ | |
1206 \ | |
859 | 1207 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 1208 {\ |
1209 int j;\ | |
1210 for(j=0; j<2; j++){\ | |
1211 int i;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1212 const uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1213 const uint32_t b= AV_RN32(pixels+1);\ |
385 | 1214 uint32_t l0= (a&0x03030303UL)\ |
1215 + (b&0x03030303UL)\ | |
1216 + 0x01010101UL;\ | |
1217 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1218 + ((b&0xFCFCFCFCUL)>>2);\ | |
1219 uint32_t l1,h1;\ | |
1220 \ | |
1221 pixels+=line_size;\ | |
1222 for(i=0; i<h; i+=2){\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1223 uint32_t a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1224 uint32_t b= AV_RN32(pixels+1);\ |
385 | 1225 l1= (a&0x03030303UL)\ |
1226 + (b&0x03030303UL);\ | |
1227 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
1228 + ((b&0xFCFCFCFCUL)>>2);\ | |
1229 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1230 pixels+=line_size;\ | |
1231 block +=line_size;\ | |
5520
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1232 a= AV_RN32(pixels );\ |
c16a59ef6a86
* renaming (ST|LD)(16|32|64) -> AV_(R|W)N(16|32|64)
romansh
parents:
5411
diff
changeset
|
1233 b= AV_RN32(pixels+1);\ |
385 | 1234 l0= (a&0x03030303UL)\ |
1235 + (b&0x03030303UL)\ | |
1236 + 0x01010101UL;\ | |
1237 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1238 + ((b&0xFCFCFCFCUL)>>2);\ | |
1239 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1240 pixels+=line_size;\ | |
1241 block +=line_size;\ | |
1242 }\ | |
1243 pixels+=4-line_size*(h+1);\ | |
1244 block +=4-line_size*h;\ | |
1245 }\ | |
1246 }\ | |
1247 \ | |
859 | 1248 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\ |
1249 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\ | |
1250 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\ | |
1251 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\ | |
1252 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\ | |
1253 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\ | |
1254 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ | |
1255 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ | |
651 | 1256 |
1264 | 1257 #define op_avg(a, b) a = rnd_avg32(a, b) |
385 | 1258 #endif |
1259 #define op_put(a, b) a = b | |
1260 | |
1261 PIXOP2(avg, op_avg) | |
1262 PIXOP2(put, op_put) | |
1263 #undef op_avg | |
1264 #undef op_put | |
1265 | |
0 | 1266 #define avg2(a,b) ((a+b+1)>>1) |
1267 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) | |
1268 | |
1864 | 1269 static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ |
1270 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h); | |
1271 } | |
1272 | |
1273 static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ | |
1274 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h); | |
1275 } | |
753 | 1276 |
1064 | 1277 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) |
255 | 1278 { |
1279 const int A=(16-x16)*(16-y16); | |
1280 const int B=( x16)*(16-y16); | |
1281 const int C=(16-x16)*( y16); | |
1282 const int D=( x16)*( y16); | |
1283 int i; | |
1284 | |
1285 for(i=0; i<h; i++) | |
1286 { | |
651 | 1287 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; |
1288 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; | |
1289 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; | |
1290 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8; | |
1291 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8; | |
1292 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8; | |
1293 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; | |
1294 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; | |
1295 dst+= stride; | |
1296 src+= stride; | |
255 | 1297 } |
1298 } | |
1299 | |
3248
7aa9f80e7954
mmx implementation of 3-point GMC. (5x faster than C)
lorenm
parents:
3245
diff
changeset
|
1300 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, |
753 | 1301 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) |
1302 { | |
1303 int y, vx, vy; | |
1304 const int s= 1<<shift; | |
2967 | 1305 |
753 | 1306 width--; |
1307 height--; | |
1308 | |
1309 for(y=0; y<h; y++){ | |
1310 int x; | |
1311 | |
1312 vx= ox; | |
1313 vy= oy; | |
1314 for(x=0; x<8; x++){ //XXX FIXME optimize | |
1315 int src_x, src_y, frac_x, frac_y, index; | |
1316 | |
1317 src_x= vx>>16; | |
1318 src_y= vy>>16; | |
1319 frac_x= src_x&(s-1); | |
1320 frac_y= src_y&(s-1); | |
1321 src_x>>=shift; | |
1322 src_y>>=shift; | |
2967 | 1323 |
753 | 1324 if((unsigned)src_x < width){ |
1325 if((unsigned)src_y < height){ | |
1326 index= src_x + src_y*stride; | |
1327 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) | |
1328 + src[index +1]* frac_x )*(s-frac_y) | |
1329 + ( src[index+stride ]*(s-frac_x) | |
1330 + src[index+stride+1]* frac_x )* frac_y | |
1331 + r)>>(shift*2); | |
1332 }else{ | |
4594 | 1333 index= src_x + av_clip(src_y, 0, height)*stride; |
2967 | 1334 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) |
753 | 1335 + src[index +1]* frac_x )*s |
1336 + r)>>(shift*2); | |
1337 } | |
1338 }else{ | |
1339 if((unsigned)src_y < height){ | |
4594 | 1340 index= av_clip(src_x, 0, width) + src_y*stride; |
2967 | 1341 dst[y*stride + x]= ( ( src[index ]*(s-frac_y) |
753 | 1342 + src[index+stride ]* frac_y )*s |
1343 + r)>>(shift*2); | |
1344 }else{ | |
4594 | 1345 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride; |
753 | 1346 dst[y*stride + x]= src[index ]; |
1347 } | |
1348 } | |
2967 | 1349 |
753 | 1350 vx+= dxx; |
1351 vy+= dyx; | |
1352 } | |
1353 ox += dxy; | |
1354 oy += dyy; | |
1355 } | |
1356 } | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1357 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1358 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1359 switch(width){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1360 case 2: put_pixels2_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1361 case 4: put_pixels4_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1362 case 8: put_pixels8_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1363 case 16:put_pixels16_c(dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1364 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1365 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1366 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1367 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1368 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1369 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1370 for (j=0; j < width; j++) { |
2979 | 1371 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1372 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1373 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1374 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1375 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1376 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1377 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1378 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1379 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1380 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1381 for (j=0; j < width; j++) { |
2979 | 1382 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1383 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1384 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1385 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1386 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1387 } |
2967 | 1388 |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1389 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1390 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1391 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1392 for (j=0; j < width; j++) { |
2979 | 1393 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1394 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1395 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1396 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1397 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1398 } |
2967 | 1399 |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1400 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1401 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1402 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1403 for (j=0; j < width; j++) { |
2979 | 1404 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1405 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1406 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1407 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1408 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1409 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1410 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1411 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1412 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1413 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1414 for (j=0; j < width; j++) { |
2979 | 1415 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1416 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1417 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1418 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1419 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1420 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1421 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1422 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1423 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1424 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1425 for (j=0; j < width; j++) { |
2979 | 1426 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1427 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1428 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1429 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1430 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1431 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1432 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1433 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1434 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1435 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1436 for (j=0; j < width; j++) { |
2979 | 1437 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1438 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1439 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1440 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1441 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1442 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1443 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1444 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1445 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1446 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1447 for (j=0; j < width; j++) { |
2979 | 1448 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1449 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1450 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1451 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1452 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1453 } |
1319 | 1454 |
1455 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1456 switch(width){ | |
1457 case 2: avg_pixels2_c (dst, src, stride, height); break; | |
1458 case 4: avg_pixels4_c (dst, src, stride, height); break; | |
1459 case 8: avg_pixels8_c (dst, src, stride, height); break; | |
1460 case 16:avg_pixels16_c(dst, src, stride, height); break; | |
1461 } | |
1462 } | |
1463 | |
1464 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1465 int i,j; | |
1466 for (i=0; i < height; i++) { | |
1467 for (j=0; j < width; j++) { | |
2979 | 1468 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; |
1319 | 1469 } |
1470 src += stride; | |
1471 dst += stride; | |
1472 } | |
1473 } | |
1474 | |
1475 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1476 int i,j; | |
1477 for (i=0; i < height; i++) { | |
1478 for (j=0; j < width; j++) { | |
2979 | 1479 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; |
1319 | 1480 } |
1481 src += stride; | |
1482 dst += stride; | |
1483 } | |
1484 } | |
2967 | 1485 |
1319 | 1486 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
1487 int i,j; | |
1488 for (i=0; i < height; i++) { | |
1489 for (j=0; j < width; j++) { | |
2979 | 1490 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; |
1319 | 1491 } |
1492 src += stride; | |
1493 dst += stride; | |
1494 } | |
1495 } | |
2967 | 1496 |
1319 | 1497 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
1498 int i,j; | |
1499 for (i=0; i < height; i++) { | |
1500 for (j=0; j < width; j++) { | |
2979 | 1501 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1502 } |
1503 src += stride; | |
1504 dst += stride; | |
1505 } | |
1506 } | |
1507 | |
1508 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1509 int i,j; | |
1510 for (i=0; i < height; i++) { | |
1511 for (j=0; j < width; j++) { | |
2979 | 1512 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1513 } |
1514 src += stride; | |
1515 dst += stride; | |
1516 } | |
1517 } | |
1518 | |
1519 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1520 int i,j; | |
1521 for (i=0; i < height; i++) { | |
1522 for (j=0; j < width; j++) { | |
2979 | 1523 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; |
1319 | 1524 } |
1525 src += stride; | |
1526 dst += stride; | |
1527 } | |
1528 } | |
1529 | |
1530 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1531 int i,j; | |
1532 for (i=0; i < height; i++) { | |
1533 for (j=0; j < width; j++) { | |
2979 | 1534 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1535 } |
1536 src += stride; | |
1537 dst += stride; | |
1538 } | |
1539 } | |
1540 | |
1541 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1542 int i,j; | |
1543 for (i=0; i < height; i++) { | |
1544 for (j=0; j < width; j++) { | |
2979 | 1545 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1546 } |
1547 src += stride; | |
1548 dst += stride; | |
1549 } | |
1550 } | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1551 #if 0 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1552 #define TPEL_WIDTH(width)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1553 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1554 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1555 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1556 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1557 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1558 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1559 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1560 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1561 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1562 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1563 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1564 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1565 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1566 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1567 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1568 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1569 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1570 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);} |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1571 #endif |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1572 |
1168 | 1573 #define H264_CHROMA_MC(OPNAME, OP)\ |
1574 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1575 const int A=(8-x)*(8-y);\ | |
1576 const int B=( x)*(8-y);\ | |
1577 const int C=(8-x)*( y);\ | |
1578 const int D=( x)*( y);\ | |
1579 int i;\ | |
1580 \ | |
1581 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1582 \ | |
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1583 if(D){\ |
6054 | 1584 for(i=0; i<h; i++){\ |
6053 | 1585 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
1586 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1587 dst+= stride;\ | |
1588 src+= stride;\ | |
1589 }\ | |
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1590 }else{\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1591 const int E= B+C;\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1592 const int step= C ? stride : 1;\ |
6054 | 1593 for(i=0; i<h; i++){\ |
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1594 OP(dst[0], (A*src[0] + E*src[step+0]));\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1595 OP(dst[1], (A*src[1] + E*src[step+1]));\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1596 dst+= stride;\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1597 src+= stride;\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1598 }\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1599 }\ |
1168 | 1600 }\ |
1601 \ | |
1602 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1603 const int A=(8-x)*(8-y);\ | |
1604 const int B=( x)*(8-y);\ | |
1605 const int C=(8-x)*( y);\ | |
1606 const int D=( x)*( y);\ | |
1607 int i;\ | |
1608 \ | |
1609 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1610 \ | |
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1611 if(D){\ |
6054 | 1612 for(i=0; i<h; i++){\ |
6053 | 1613 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
1614 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1615 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | |
1616 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | |
1617 dst+= stride;\ | |
1618 src+= stride;\ | |
1619 }\ | |
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1620 }else{\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1621 const int E= B+C;\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1622 const int step= C ? stride : 1;\ |
6054 | 1623 for(i=0; i<h; i++){\ |
6052
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1624 OP(dst[0], (A*src[0] + E*src[step+0]));\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1625 OP(dst[1], (A*src[1] + E*src[step+1]));\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1626 OP(dst[2], (A*src[2] + E*src[step+2]));\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1627 OP(dst[3], (A*src[3] + E*src[step+3]));\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1628 dst+= stride;\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1629 src+= stride;\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1630 }\ |
c90798ac28ee
~15% faster h264_chroma_mc2/4_c() these also prevent some possible out
michael
parents:
6051
diff
changeset
|
1631 }\ |
1168 | 1632 }\ |
1633 \ | |
1634 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1635 const int A=(8-x)*(8-y);\ | |
1636 const int B=( x)*(8-y);\ | |
1637 const int C=(8-x)*( y);\ | |
1638 const int D=( x)*( y);\ | |
1639 int i;\ | |
1640 \ | |
1641 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1642 \ | |
6051
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1643 if(D){\ |
6054 | 1644 for(i=0; i<h; i++){\ |
6053 | 1645 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
1646 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1647 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | |
1648 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | |
1649 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ | |
1650 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ | |
1651 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ | |
1652 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ | |
1653 dst+= stride;\ | |
1654 src+= stride;\ | |
1655 }\ | |
6051
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1656 }else{\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1657 const int E= B+C;\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1658 const int step= C ? stride : 1;\ |
6054 | 1659 for(i=0; i<h; i++){\ |
6051
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1660 OP(dst[0], (A*src[0] + E*src[step+0]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1661 OP(dst[1], (A*src[1] + E*src[step+1]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1662 OP(dst[2], (A*src[2] + E*src[step+2]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1663 OP(dst[3], (A*src[3] + E*src[step+3]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1664 OP(dst[4], (A*src[4] + E*src[step+4]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1665 OP(dst[5], (A*src[5] + E*src[step+5]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1666 OP(dst[6], (A*src[6] + E*src[step+6]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1667 OP(dst[7], (A*src[7] + E*src[step+7]));\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1668 dst+= stride;\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1669 src+= stride;\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1670 }\ |
1e3b5597505a
30% faster h264_chroma_mc8_c(), this also prevents a possible out of
michael
parents:
6001
diff
changeset
|
1671 }\ |
1168 | 1672 } |
1673 | |
1674 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) | |
1675 #define op_put(a, b) a = (((b) + 32)>>6) | |
1676 | |
1677 H264_CHROMA_MC(put_ , op_put) | |
1678 H264_CHROMA_MC(avg_ , op_avg) | |
1679 #undef op_avg | |
1680 #undef op_put | |
1681 | |
3663 | 1682 static void put_no_rnd_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ |
1683 const int A=(8-x)*(8-y); | |
1684 const int B=( x)*(8-y); | |
1685 const int C=(8-x)*( y); | |
1686 const int D=( x)*( y); | |
1687 int i; | |
1688 | |
1689 assert(x<8 && y<8 && x>=0 && y>=0); | |
1690 | |
1691 for(i=0; i<h; i++) | |
1692 { | |
1693 dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6; | |
1694 dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6; | |
1695 dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6; | |
1696 dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6; | |
1697 dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6; | |
1698 dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6; | |
1699 dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6; | |
1700 dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6; | |
1701 dst+= stride; | |
1702 src+= stride; | |
1703 } | |
1704 } | |
1705 | |
651 | 1706 #define QPEL_MC(r, OPNAME, RND, OP) \ |
1064 | 1707 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
4176 | 1708 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
651 | 1709 int i;\ |
1710 for(i=0; i<h; i++)\ | |
1711 {\ | |
1712 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\ | |
1713 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\ | |
1714 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\ | |
1715 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\ | |
1716 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\ | |
1717 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\ | |
1718 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\ | |
1719 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\ | |
1720 dst+=dstStride;\ | |
1721 src+=srcStride;\ | |
1722 }\ | |
1723 }\ | |
1724 \ | |
1064 | 1725 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
984 | 1726 const int w=8;\ |
4176 | 1727 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
651 | 1728 int i;\ |
1729 for(i=0; i<w; i++)\ | |
1730 {\ | |
1731 const int src0= src[0*srcStride];\ | |
1732 const int src1= src[1*srcStride];\ | |
1733 const int src2= src[2*srcStride];\ | |
1734 const int src3= src[3*srcStride];\ | |
1735 const int src4= src[4*srcStride];\ | |
1736 const int src5= src[5*srcStride];\ | |
1737 const int src6= src[6*srcStride];\ | |
1738 const int src7= src[7*srcStride];\ | |
1739 const int src8= src[8*srcStride];\ | |
1740 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\ | |
1741 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\ | |
1742 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\ | |
1743 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\ | |
1744 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\ | |
1745 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\ | |
1746 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\ | |
1747 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ | |
1748 dst++;\ | |
1749 src++;\ | |
1750 }\ | |
1751 }\ | |
1752 \ | |
1064 | 1753 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
4176 | 1754 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
651 | 1755 int i;\ |
954 | 1756 \ |
651 | 1757 for(i=0; i<h; i++)\ |
1758 {\ | |
1759 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\ | |
1760 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\ | |
1761 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\ | |
1762 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\ | |
1763 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\ | |
1764 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\ | |
1765 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\ | |
1766 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\ | |
1767 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\ | |
1768 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\ | |
1769 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\ | |
1770 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\ | |
1771 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\ | |
1772 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\ | |
1773 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\ | |
1774 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\ | |
1775 dst+=dstStride;\ | |
1776 src+=srcStride;\ | |
1777 }\ | |
255 | 1778 }\ |
1779 \ | |
1064 | 1780 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
4176 | 1781 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
651 | 1782 int i;\ |
954 | 1783 const int w=16;\ |
651 | 1784 for(i=0; i<w; i++)\ |
1785 {\ | |
1786 const int src0= src[0*srcStride];\ | |
1787 const int src1= src[1*srcStride];\ | |
1788 const int src2= src[2*srcStride];\ | |
1789 const int src3= src[3*srcStride];\ | |
1790 const int src4= src[4*srcStride];\ | |
1791 const int src5= src[5*srcStride];\ | |
1792 const int src6= src[6*srcStride];\ | |
1793 const int src7= src[7*srcStride];\ | |
1794 const int src8= src[8*srcStride];\ | |
1795 const int src9= src[9*srcStride];\ | |
1796 const int src10= src[10*srcStride];\ | |
1797 const int src11= src[11*srcStride];\ | |
1798 const int src12= src[12*srcStride];\ | |
1799 const int src13= src[13*srcStride];\ | |
1800 const int src14= src[14*srcStride];\ | |
1801 const int src15= src[15*srcStride];\ | |
1802 const int src16= src[16*srcStride];\ | |
1803 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\ | |
1804 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\ | |
1805 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\ | |
1806 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\ | |
1807 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\ | |
1808 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\ | |
1809 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\ | |
1810 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\ | |
1811 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\ | |
1812 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\ | |
1813 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\ | |
1814 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\ | |
1815 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\ | |
1816 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\ | |
1817 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\ | |
1818 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ | |
1819 dst++;\ | |
1820 src++;\ | |
1821 }\ | |
255 | 1822 }\ |
1823 \ | |
1064 | 1824 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ |
859 | 1825 OPNAME ## pixels8_c(dst, src, stride, 8);\ |
255 | 1826 }\ |
1827 \ | |
1064 | 1828 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ |
1829 uint8_t half[64];\ | |
651 | 1830 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ |
1831 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\ | |
1832 }\ | |
1833 \ | |
1064 | 1834 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ |
651 | 1835 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\ |
255 | 1836 }\ |
1837 \ | |
1064 | 1838 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ |
1839 uint8_t half[64];\ | |
651 | 1840 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ |
1841 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\ | |
1842 }\ | |
1843 \ | |
1064 | 1844 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ |
1845 uint8_t full[16*9];\ | |
1846 uint8_t half[64];\ | |
651 | 1847 copy_block9(full, src, 16, stride, 9);\ |
984 | 1848 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ |
651 | 1849 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\ |
1850 }\ | |
1851 \ | |
1064 | 1852 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ |
1853 uint8_t full[16*9];\ | |
651 | 1854 copy_block9(full, src, 16, stride, 9);\ |
984 | 1855 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ |
255 | 1856 }\ |
1857 \ | |
1064 | 1858 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ |
1859 uint8_t full[16*9];\ | |
1860 uint8_t half[64];\ | |
651 | 1861 copy_block9(full, src, 16, stride, 9);\ |
984 | 1862 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ |
651 | 1863 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\ |
1864 }\ | |
1064 | 1865 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1866 uint8_t full[16*9];\ | |
1867 uint8_t halfH[72];\ | |
1868 uint8_t halfV[64];\ | |
1869 uint8_t halfHV[64];\ | |
651 | 1870 copy_block9(full, src, 16, stride, 9);\ |
1871 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1872 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1873 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1874 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1875 }\ |
1064 | 1876 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ |
1877 uint8_t full[16*9];\ | |
1878 uint8_t halfH[72];\ | |
1879 uint8_t halfHV[64];\ | |
984 | 1880 copy_block9(full, src, 16, stride, 9);\ |
1881 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1882 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1883 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1884 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
1885 }\ | |
1064 | 1886 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1887 uint8_t full[16*9];\ | |
1888 uint8_t halfH[72];\ | |
1889 uint8_t halfV[64];\ | |
1890 uint8_t halfHV[64];\ | |
651 | 1891 copy_block9(full, src, 16, stride, 9);\ |
1892 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1893 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1894 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1895 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1896 }\ |
1064 | 1897 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ |
1898 uint8_t full[16*9];\ | |
1899 uint8_t halfH[72];\ | |
1900 uint8_t halfHV[64];\ | |
984 | 1901 copy_block9(full, src, 16, stride, 9);\ |
1902 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1903 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1904 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1905 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
1906 }\ | |
1064 | 1907 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1908 uint8_t full[16*9];\ | |
1909 uint8_t halfH[72];\ | |
1910 uint8_t halfV[64];\ | |
1911 uint8_t halfHV[64];\ | |
651 | 1912 copy_block9(full, src, 16, stride, 9);\ |
1913 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1914 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1915 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1916 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
1917 }\ | |
1064 | 1918 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ |
1919 uint8_t full[16*9];\ | |
1920 uint8_t halfH[72];\ | |
1921 uint8_t halfHV[64];\ | |
984 | 1922 copy_block9(full, src, 16, stride, 9);\ |
1923 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1924 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1925 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1926 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
1927 }\ | |
1064 | 1928 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1929 uint8_t full[16*9];\ | |
1930 uint8_t halfH[72];\ | |
1931 uint8_t halfV[64];\ | |
1932 uint8_t halfHV[64];\ | |
651 | 1933 copy_block9(full, src, 16, stride, 9);\ |
1934 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\ | |
984 | 1935 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1936 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1937 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1938 }\ |
1064 | 1939 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ |
1940 uint8_t full[16*9];\ | |
1941 uint8_t halfH[72];\ | |
1942 uint8_t halfHV[64];\ | |
984 | 1943 copy_block9(full, src, 16, stride, 9);\ |
1944 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1945 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1946 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1947 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
1948 }\ | |
1064 | 1949 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ |
1950 uint8_t halfH[72];\ | |
1951 uint8_t halfHV[64];\ | |
651 | 1952 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1953 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ |
651 | 1954 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ |
1955 }\ | |
1064 | 1956 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ |
1957 uint8_t halfH[72];\ | |
1958 uint8_t halfHV[64];\ | |
651 | 1959 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1960 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ |
651 | 1961 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ |
1962 }\ | |
1064 | 1963 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1964 uint8_t full[16*9];\ | |
1965 uint8_t halfH[72];\ | |
1966 uint8_t halfV[64];\ | |
1967 uint8_t halfHV[64];\ | |
651 | 1968 copy_block9(full, src, 16, stride, 9);\ |
1969 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1970 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1971 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1972 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ |
255 | 1973 }\ |
1064 | 1974 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ |
1975 uint8_t full[16*9];\ | |
1976 uint8_t halfH[72];\ | |
984 | 1977 copy_block9(full, src, 16, stride, 9);\ |
1978 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1979 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1980 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
1981 }\ | |
1064 | 1982 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1983 uint8_t full[16*9];\ | |
1984 uint8_t halfH[72];\ | |
1985 uint8_t halfV[64];\ | |
1986 uint8_t halfHV[64];\ | |
651 | 1987 copy_block9(full, src, 16, stride, 9);\ |
1988 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1989 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1990 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1991 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ |
1992 }\ | |
1064 | 1993 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ |
1994 uint8_t full[16*9];\ | |
1995 uint8_t halfH[72];\ | |
984 | 1996 copy_block9(full, src, 16, stride, 9);\ |
1997 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1998 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1999 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
2000 }\ | |
1064 | 2001 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ |
2002 uint8_t halfH[72];\ | |
651 | 2003 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 2004 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ |
651 | 2005 }\ |
1064 | 2006 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ |
859 | 2007 OPNAME ## pixels16_c(dst, src, stride, 16);\ |
255 | 2008 }\ |
651 | 2009 \ |
1064 | 2010 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ |
2011 uint8_t half[256];\ | |
651 | 2012 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ |
2013 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\ | |
2014 }\ | |
2015 \ | |
1064 | 2016 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ |
651 | 2017 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\ |
2018 }\ | |
2019 \ | |
1064 | 2020 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ |
2021 uint8_t half[256];\ | |
651 | 2022 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ |
2023 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\ | |
2024 }\ | |
2025 \ | |
1064 | 2026 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ |
2027 uint8_t full[24*17];\ | |
2028 uint8_t half[256];\ | |
651 | 2029 copy_block17(full, src, 24, stride, 17);\ |
954 | 2030 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ |
651 | 2031 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\ |
255 | 2032 }\ |
651 | 2033 \ |
1064 | 2034 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ |
2035 uint8_t full[24*17];\ | |
651 | 2036 copy_block17(full, src, 24, stride, 17);\ |
954 | 2037 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\ |
651 | 2038 }\ |
2039 \ | |
1064 | 2040 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ |
2041 uint8_t full[24*17];\ | |
2042 uint8_t half[256];\ | |
651 | 2043 copy_block17(full, src, 24, stride, 17);\ |
954 | 2044 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ |
651 | 2045 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\ |
255 | 2046 }\ |
1064 | 2047 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
2048 uint8_t full[24*17];\ | |
2049 uint8_t halfH[272];\ | |
2050 uint8_t halfV[256];\ | |
2051 uint8_t halfHV[256];\ | |
651 | 2052 copy_block17(full, src, 24, stride, 17);\ |
2053 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 2054 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
2055 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 2056 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
2057 }\ | |
1064 | 2058 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ |
2059 uint8_t full[24*17];\ | |
2060 uint8_t halfH[272];\ | |
2061 uint8_t halfHV[256];\ | |
984 | 2062 copy_block17(full, src, 24, stride, 17);\ |
2063 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2064 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
2065 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
2066 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
2067 }\ | |
1064 | 2068 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
2069 uint8_t full[24*17];\ | |
2070 uint8_t halfH[272];\ | |
2071 uint8_t halfV[256];\ | |
2072 uint8_t halfHV[256];\ | |
651 | 2073 copy_block17(full, src, 24, stride, 17);\ |
2074 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 2075 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
2076 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 2077 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
2078 }\ | |
1064 | 2079 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ |
2080 uint8_t full[24*17];\ | |
2081 uint8_t halfH[272];\ | |
2082 uint8_t halfHV[256];\ | |
984 | 2083 copy_block17(full, src, 24, stride, 17);\ |
2084 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2085 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
2086 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
2087 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
2088 }\ | |
1064 | 2089 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
2090 uint8_t full[24*17];\ | |
2091 uint8_t halfH[272];\ | |
2092 uint8_t halfV[256];\ | |
2093 uint8_t halfHV[256];\ | |
651 | 2094 copy_block17(full, src, 24, stride, 17);\ |
2095 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 2096 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
2097 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 2098 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
255 | 2099 }\ |
1064 | 2100 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ |
2101 uint8_t full[24*17];\ | |
2102 uint8_t halfH[272];\ | |
2103 uint8_t halfHV[256];\ | |
984 | 2104 copy_block17(full, src, 24, stride, 17);\ |
2105 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2106 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
2107 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
2108 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
2109 }\ | |
1064 | 2110 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
2111 uint8_t full[24*17];\ | |
2112 uint8_t halfH[272];\ | |
2113 uint8_t halfV[256];\ | |
2114 uint8_t halfHV[256];\ | |
651 | 2115 copy_block17(full, src, 24, stride, 17);\ |
2116 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\ | |
954 | 2117 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
2118 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 2119 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
2120 }\ | |
1064 | 2121 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ |
2122 uint8_t full[24*17];\ | |
2123 uint8_t halfH[272];\ | |
2124 uint8_t halfHV[256];\ | |
984 | 2125 copy_block17(full, src, 24, stride, 17);\ |
2126 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2127 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
2128 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
2129 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
2130 }\ | |
1064 | 2131 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ |
2132 uint8_t halfH[272];\ | |
2133 uint8_t halfHV[256];\ | |
651 | 2134 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 2135 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ |
651 | 2136 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ |
255 | 2137 }\ |
1064 | 2138 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ |
2139 uint8_t halfH[272];\ | |
2140 uint8_t halfHV[256];\ | |
651 | 2141 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 2142 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ |
651 | 2143 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ |
2144 }\ | |
1064 | 2145 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
2146 uint8_t full[24*17];\ | |
2147 uint8_t halfH[272];\ | |
2148 uint8_t halfV[256];\ | |
2149 uint8_t halfHV[256];\ | |
651 | 2150 copy_block17(full, src, 24, stride, 17);\ |
2151 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 2152 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
2153 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 2154 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ |
255 | 2155 }\ |
1064 | 2156 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ |
2157 uint8_t full[24*17];\ | |
2158 uint8_t halfH[272];\ | |
984 | 2159 copy_block17(full, src, 24, stride, 17);\ |
2160 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2161 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
2162 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
2163 }\ | |
1064 | 2164 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
2165 uint8_t full[24*17];\ | |
2166 uint8_t halfH[272];\ | |
2167 uint8_t halfV[256];\ | |
2168 uint8_t halfHV[256];\ | |
651 | 2169 copy_block17(full, src, 24, stride, 17);\ |
2170 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 2171 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
2172 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 2173 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ |
2174 }\ | |
1064 | 2175 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ |
2176 uint8_t full[24*17];\ | |
2177 uint8_t halfH[272];\ | |
984 | 2178 copy_block17(full, src, 24, stride, 17);\ |
2179 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2180 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
2181 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
2182 }\ | |
1064 | 2183 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ |
2184 uint8_t halfH[272];\ | |
651 | 2185 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 2186 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ |
859 | 2187 } |
255 | 2188 |
651 | 2189 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) |
2190 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1) | |
2191 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
2192 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5] | |
2193 | |
2194 QPEL_MC(0, put_ , _ , op_put) | |
2195 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) | |
2196 QPEL_MC(0, avg_ , _ , op_avg) | |
2197 //QPEL_MC(1, avg_no_rnd , _ , op_avg) | |
2198 #undef op_avg | |
2199 #undef op_avg_no_rnd | |
2200 #undef op_put | |
2201 #undef op_put_no_rnd | |
255 | 2202 |
1168 | 2203 #if 1 |
2204 #define H264_LOWPASS(OPNAME, OP, OP2) \ | |
5151 | 2205 static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2206 const int h=2;\ |
4176 | 2207 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2208 int i;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2209 for(i=0; i<h; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2210 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2211 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2212 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2213 dst+=dstStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2214 src+=srcStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2215 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2216 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2217 \ |
5151 | 2218 static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2219 const int w=2;\ |
4176 | 2220 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2221 int i;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2222 for(i=0; i<w; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2223 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2224 const int srcB= src[-2*srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2225 const int srcA= src[-1*srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2226 const int src0= src[0 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2227 const int src1= src[1 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2228 const int src2= src[2 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2229 const int src3= src[3 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2230 const int src4= src[4 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2231 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2232 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2233 dst++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2234 src++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2235 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2236 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2237 \ |
5151 | 2238 static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2239 const int h=2;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2240 const int w=2;\ |
4176 | 2241 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2242 int i;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2243 src -= 2*srcStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2244 for(i=0; i<h+5; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2245 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2246 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2247 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2248 tmp+=tmpStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2249 src+=srcStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2250 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2251 tmp -= tmpStride*(h+5-2);\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2252 for(i=0; i<w; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2253 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2254 const int tmpB= tmp[-2*tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2255 const int tmpA= tmp[-1*tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2256 const int tmp0= tmp[0 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2257 const int tmp1= tmp[1 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2258 const int tmp2= tmp[2 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2259 const int tmp3= tmp[3 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2260 const int tmp4= tmp[4 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2261 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2262 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2263 dst++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2264 tmp++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2265 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2266 }\ |
1168 | 2267 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
2268 const int h=4;\ | |
4176 | 2269 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2270 int i;\ |
2271 for(i=0; i<h; i++)\ | |
2272 {\ | |
2273 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ | |
2274 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ | |
2275 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ | |
2276 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ | |
2277 dst+=dstStride;\ | |
2278 src+=srcStride;\ | |
2279 }\ | |
2280 }\ | |
2281 \ | |
2282 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2283 const int w=4;\ | |
4176 | 2284 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2285 int i;\ |
2286 for(i=0; i<w; i++)\ | |
2287 {\ | |
2288 const int srcB= src[-2*srcStride];\ | |
2289 const int srcA= src[-1*srcStride];\ | |
2290 const int src0= src[0 *srcStride];\ | |
2291 const int src1= src[1 *srcStride];\ | |
2292 const int src2= src[2 *srcStride];\ | |
2293 const int src3= src[3 *srcStride];\ | |
2294 const int src4= src[4 *srcStride];\ | |
2295 const int src5= src[5 *srcStride];\ | |
2296 const int src6= src[6 *srcStride];\ | |
2297 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
2298 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
2299 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
2300 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
2301 dst++;\ | |
2302 src++;\ | |
2303 }\ | |
2304 }\ | |
2305 \ | |
2306 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2307 const int h=4;\ | |
2308 const int w=4;\ | |
4176 | 2309 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2310 int i;\ |
2311 src -= 2*srcStride;\ | |
2312 for(i=0; i<h+5; i++)\ | |
2313 {\ | |
2314 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ | |
2315 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ | |
2316 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\ | |
2317 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\ | |
2318 tmp+=tmpStride;\ | |
2319 src+=srcStride;\ | |
2320 }\ | |
2321 tmp -= tmpStride*(h+5-2);\ | |
2322 for(i=0; i<w; i++)\ | |
2323 {\ | |
2324 const int tmpB= tmp[-2*tmpStride];\ | |
2325 const int tmpA= tmp[-1*tmpStride];\ | |
2326 const int tmp0= tmp[0 *tmpStride];\ | |
2327 const int tmp1= tmp[1 *tmpStride];\ | |
2328 const int tmp2= tmp[2 *tmpStride];\ | |
2329 const int tmp3= tmp[3 *tmpStride];\ | |
2330 const int tmp4= tmp[4 *tmpStride];\ | |
2331 const int tmp5= tmp[5 *tmpStride];\ | |
2332 const int tmp6= tmp[6 *tmpStride];\ | |
2333 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |
2334 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |
2335 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | |
2336 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | |
2337 dst++;\ | |
2338 tmp++;\ | |
2339 }\ | |
2340 }\ | |
2341 \ | |
2342 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2343 const int h=8;\ | |
4176 | 2344 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2345 int i;\ |
2346 for(i=0; i<h; i++)\ | |
2347 {\ | |
2348 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\ | |
2349 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\ | |
2350 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\ | |
2351 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\ | |
2352 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\ | |
2353 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\ | |
2354 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\ | |
2355 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\ | |
2356 dst+=dstStride;\ | |
2357 src+=srcStride;\ | |
2358 }\ | |
2359 }\ | |
2360 \ | |
2361 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2362 const int w=8;\ | |
4176 | 2363 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2364 int i;\ |
2365 for(i=0; i<w; i++)\ | |
2366 {\ | |
2367 const int srcB= src[-2*srcStride];\ | |
2368 const int srcA= src[-1*srcStride];\ | |
2369 const int src0= src[0 *srcStride];\ | |
2370 const int src1= src[1 *srcStride];\ | |
2371 const int src2= src[2 *srcStride];\ | |
2372 const int src3= src[3 *srcStride];\ | |
2373 const int src4= src[4 *srcStride];\ | |
2374 const int src5= src[5 *srcStride];\ | |
2375 const int src6= src[6 *srcStride];\ | |
2376 const int src7= src[7 *srcStride];\ | |
2377 const int src8= src[8 *srcStride];\ | |
2378 const int src9= src[9 *srcStride];\ | |
2379 const int src10=src[10*srcStride];\ | |
2380 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
2381 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
2382 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
2383 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
2384 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ | |
2385 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ | |
2386 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ | |
2387 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ | |
2388 dst++;\ | |
2389 src++;\ | |
2390 }\ | |
2391 }\ | |
2392 \ | |
2393 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2394 const int h=8;\ | |
2395 const int w=8;\ | |
4176 | 2396 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
1168 | 2397 int i;\ |
2398 src -= 2*srcStride;\ | |
2399 for(i=0; i<h+5; i++)\ | |
2400 {\ | |
2401 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\ | |
2402 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\ | |
2403 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\ | |
2404 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\ | |
2405 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\ | |
2406 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\ | |
2407 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\ | |
2408 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\ | |
2409 tmp+=tmpStride;\ | |
2410 src+=srcStride;\ | |
2411 }\ | |
2412 tmp -= tmpStride*(h+5-2);\ | |
2413 for(i=0; i<w; i++)\ | |
2414 {\ | |
2415 const int tmpB= tmp[-2*tmpStride];\ | |
2416 const int tmpA= tmp[-1*tmpStride];\ | |
2417 const int tmp0= tmp[0 *tmpStride];\ | |
2418 const int tmp1= tmp[1 *tmpStride];\ | |
2419 const int tmp2= tmp[2 *tmpStride];\ | |
2420 const int tmp3= tmp[3 *tmpStride];\ | |
2421 const int tmp4= tmp[4 *tmpStride];\ | |
2422 const int tmp5= tmp[5 *tmpStride];\ | |
2423 const int tmp6= tmp[6 *tmpStride];\ | |
2424 const int tmp7= tmp[7 *tmpStride];\ | |
2425 const int tmp8= tmp[8 *tmpStride];\ | |
2426 const int tmp9= tmp[9 *tmpStride];\ | |
2427 const int tmp10=tmp[10*tmpStride];\ | |
2428 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |
2429 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |
2430 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | |
2431 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | |
2432 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\ | |
2433 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\ | |
2434 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\ | |
2435 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\ | |
2436 dst++;\ | |
2437 tmp++;\ | |
2438 }\ | |
2439 }\ | |
2440 \ | |
2441 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2442 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | |
2443 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2444 src += 8*srcStride;\ | |
2445 dst += 8*dstStride;\ | |
2446 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | |
2447 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2448 }\ | |
2449 \ | |
2450 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2451 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | |
2452 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2453 src += 8*srcStride;\ | |
2454 dst += 8*dstStride;\ | |
2455 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | |
2456 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2457 }\ | |
2458 \ | |
2459 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2460 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | |
2461 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | |
2462 src += 8*srcStride;\ | |
2463 dst += 8*dstStride;\ | |
2464 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | |
2465 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | |
2466 }\ | |
2467 | |
2468 #define H264_MC(OPNAME, SIZE) \ | |
2469 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\ | |
2470 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ | |
2471 }\ | |
2472 \ | |
2473 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2474 uint8_t half[SIZE*SIZE];\ | |
2475 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
2476 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\ | |
2477 }\ | |
2478 \ | |
2479 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2480 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ | |
2481 }\ | |
2482 \ | |
2483 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2484 uint8_t half[SIZE*SIZE];\ | |
2485 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
2486 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\ | |
2487 }\ | |
2488 \ | |
2489 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2490 uint8_t full[SIZE*(SIZE+5)];\ | |
2491 uint8_t * const full_mid= full + SIZE*2;\ | |
2492 uint8_t half[SIZE*SIZE];\ | |
2493 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2494 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
2495 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ | |
2496 }\ | |
2497 \ | |
2498 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2499 uint8_t full[SIZE*(SIZE+5)];\ | |
2500 uint8_t * const full_mid= full + SIZE*2;\ | |
2501 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2502 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ | |
2503 }\ | |
2504 \ | |
2505 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2506 uint8_t full[SIZE*(SIZE+5)];\ | |
2507 uint8_t * const full_mid= full + SIZE*2;\ | |
2508 uint8_t half[SIZE*SIZE];\ | |
2509 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2510 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
2511 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ | |
2512 }\ | |
2513 \ | |
2514 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2515 uint8_t full[SIZE*(SIZE+5)];\ | |
2516 uint8_t * const full_mid= full + SIZE*2;\ | |
2517 uint8_t halfH[SIZE*SIZE];\ | |
2518 uint8_t halfV[SIZE*SIZE];\ | |
2519 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2520 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2521 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2522 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2523 }\ | |
2524 \ | |
2525 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2526 uint8_t full[SIZE*(SIZE+5)];\ | |
2527 uint8_t * const full_mid= full + SIZE*2;\ | |
2528 uint8_t halfH[SIZE*SIZE];\ | |
2529 uint8_t halfV[SIZE*SIZE];\ | |
2530 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2531 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2532 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2533 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2534 }\ | |
2535 \ | |
2536 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2537 uint8_t full[SIZE*(SIZE+5)];\ | |
2538 uint8_t * const full_mid= full + SIZE*2;\ | |
2539 uint8_t halfH[SIZE*SIZE];\ | |
2540 uint8_t halfV[SIZE*SIZE];\ | |
2541 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2542 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2543 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2544 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2545 }\ | |
2546 \ | |
2547 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2548 uint8_t full[SIZE*(SIZE+5)];\ | |
2549 uint8_t * const full_mid= full + SIZE*2;\ | |
2550 uint8_t halfH[SIZE*SIZE];\ | |
2551 uint8_t halfV[SIZE*SIZE];\ | |
2552 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2553 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2554 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2555 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2556 }\ | |
2557 \ | |
2558 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2559 int16_t tmp[SIZE*(SIZE+5)];\ | |
2560 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ | |
2561 }\ | |
2562 \ | |
2563 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2564 int16_t tmp[SIZE*(SIZE+5)];\ | |
2565 uint8_t halfH[SIZE*SIZE];\ | |
2566 uint8_t halfHV[SIZE*SIZE];\ | |
2567 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2568 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2569 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2570 }\ | |
2571 \ | |
2572 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2573 int16_t tmp[SIZE*(SIZE+5)];\ | |
2574 uint8_t halfH[SIZE*SIZE];\ | |
2575 uint8_t halfHV[SIZE*SIZE];\ | |
2576 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2577 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2578 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2579 }\ | |
2580 \ | |
2581 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2582 uint8_t full[SIZE*(SIZE+5)];\ | |
2583 uint8_t * const full_mid= full + SIZE*2;\ | |
2584 int16_t tmp[SIZE*(SIZE+5)];\ | |
2585 uint8_t halfV[SIZE*SIZE];\ | |
2586 uint8_t halfHV[SIZE*SIZE];\ | |
2587 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2588 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2589 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2590 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2591 }\ | |
2592 \ | |
2593 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2594 uint8_t full[SIZE*(SIZE+5)];\ | |
2595 uint8_t * const full_mid= full + SIZE*2;\ | |
2596 int16_t tmp[SIZE*(SIZE+5)];\ | |
2597 uint8_t halfV[SIZE*SIZE];\ | |
2598 uint8_t halfHV[SIZE*SIZE];\ | |
2599 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2600 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2601 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2602 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2603 }\ | |
2604 | |
2605 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) | |
2606 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) | |
2607 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
2608 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) | |
2609 #define op2_put(a, b) a = cm[((b) + 512)>>10] | |
2610 | |
2611 H264_LOWPASS(put_ , op_put, op2_put) | |
2612 H264_LOWPASS(avg_ , op_avg, op2_avg) | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2613 H264_MC(put_, 2) |
1168 | 2614 H264_MC(put_, 4) |
2615 H264_MC(put_, 8) | |
2616 H264_MC(put_, 16) | |
2617 H264_MC(avg_, 4) | |
2618 H264_MC(avg_, 8) | |
2619 H264_MC(avg_, 16) | |
2620 | |
2621 #undef op_avg | |
2622 #undef op_put | |
2623 #undef op2_avg | |
2624 #undef op2_put | |
2625 #endif | |
2626 | |
4594 | 2627 #define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom ) |
2628 #define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) | |
2415 | 2629 #define H264_WEIGHT(W,H) \ |
2630 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \ | |
3029 | 2631 int y; \ |
2415 | 2632 offset <<= log2_denom; \ |
2633 if(log2_denom) offset += 1<<(log2_denom-1); \ | |
2634 for(y=0; y<H; y++, block += stride){ \ | |
2635 op_scale1(0); \ | |
2636 op_scale1(1); \ | |
2637 if(W==2) continue; \ | |
2638 op_scale1(2); \ | |
2639 op_scale1(3); \ | |
2640 if(W==4) continue; \ | |
2641 op_scale1(4); \ | |
2642 op_scale1(5); \ | |
2643 op_scale1(6); \ | |
2644 op_scale1(7); \ | |
2645 if(W==8) continue; \ | |
2646 op_scale1(8); \ | |
2647 op_scale1(9); \ | |
2648 op_scale1(10); \ | |
2649 op_scale1(11); \ | |
2650 op_scale1(12); \ | |
2651 op_scale1(13); \ | |
2652 op_scale1(14); \ | |
2653 op_scale1(15); \ | |
2654 } \ | |
2655 } \ | |
3029 | 2656 static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \ |
2657 int y; \ | |
2658 offset = ((offset + 1) | 1) << log2_denom; \ | |
2415 | 2659 for(y=0; y<H; y++, dst += stride, src += stride){ \ |
2660 op_scale2(0); \ | |
2661 op_scale2(1); \ | |
2662 if(W==2) continue; \ | |
2663 op_scale2(2); \ | |
2664 op_scale2(3); \ | |
2665 if(W==4) continue; \ | |
2666 op_scale2(4); \ | |
2667 op_scale2(5); \ | |
2668 op_scale2(6); \ | |
2669 op_scale2(7); \ | |
2670 if(W==8) continue; \ | |
2671 op_scale2(8); \ | |
2672 op_scale2(9); \ | |
2673 op_scale2(10); \ | |
2674 op_scale2(11); \ | |
2675 op_scale2(12); \ | |
2676 op_scale2(13); \ | |
2677 op_scale2(14); \ | |
2678 op_scale2(15); \ | |
2679 } \ | |
2680 } | |
2681 | |
2682 H264_WEIGHT(16,16) | |
2683 H264_WEIGHT(16,8) | |
2684 H264_WEIGHT(8,16) | |
2685 H264_WEIGHT(8,8) | |
2686 H264_WEIGHT(8,4) | |
2687 H264_WEIGHT(4,8) | |
2688 H264_WEIGHT(4,4) | |
2689 H264_WEIGHT(4,2) | |
2690 H264_WEIGHT(2,4) | |
2691 H264_WEIGHT(2,2) | |
2692 | |
2693 #undef op_scale1 | |
2694 #undef op_scale2 | |
2695 #undef H264_WEIGHT | |
2696 | |
936 | 2697 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ |
4176 | 2698 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
936 | 2699 int i; |
2700 | |
2701 for(i=0; i<h; i++){ | |
2702 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4]; | |
2703 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4]; | |
2704 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4]; | |
2705 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4]; | |
2706 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4]; | |
2707 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; | |
2708 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; | |
2709 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; | |
2710 dst+=dstStride; | |
2967 | 2711 src+=srcStride; |
936 | 2712 } |
2713 } | |
2714 | |
8590 | 2715 #if CONFIG_CAVS_DECODER |
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2716 /* AVS specific */ |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2717 void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2718 |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2719 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2720 put_pixels8_c(dst, src, stride, 8); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2721 } |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2722 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2723 avg_pixels8_c(dst, src, stride, 8); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2724 } |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2725 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2726 put_pixels16_c(dst, src, stride, 16); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2727 } |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2728 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2729 avg_pixels16_c(dst, src, stride, 16); |
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2730 } |
3432 | 2731 #endif /* CONFIG_CAVS_DECODER */ |
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
2732 |
8590 | 2733 #if CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER |
3526 | 2734 /* VC-1 specific */ |
2735 void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx); | |
2736 | |
2737 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) { | |
2738 put_pixels8_c(dst, src, stride, 8); | |
2739 } | |
2740 #endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */ | |
2741 | |
5887 | 2742 void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx); |
5899 | 2743 |
4296 | 2744 /* H264 specific */ |
5411
362aec4ef932
Take care of some renames (Doxygen and function name) after the previous pure rename patch.
takis
parents:
5394
diff
changeset
|
2745 void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx); |
4296 | 2746 |
8590 | 2747 #if CONFIG_RV30_DECODER |
8410 | 2748 void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx); |
2749 #endif /* CONFIG_RV30_DECODER */ | |
2750 | |
8590 | 2751 #if CONFIG_RV40_DECODER |
8232 | 2752 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ |
2753 put_pixels16_xy2_c(dst, src, stride, 16); | |
2754 } | |
2755 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | |
2756 avg_pixels16_xy2_c(dst, src, stride, 16); | |
2757 } | |
2758 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | |
2759 put_pixels8_xy2_c(dst, src, stride, 8); | |
2760 } | |
2761 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ | |
2762 avg_pixels8_xy2_c(dst, src, stride, 8); | |
2763 } | |
2764 | |
2765 void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx); | |
2766 #endif /* CONFIG_RV40_DECODER */ | |
2767 | |
936 | 2768 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ |
4176 | 2769 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
936 | 2770 int i; |
2771 | |
2772 for(i=0; i<w; i++){ | |
2773 const int src_1= src[ -srcStride]; | |
2774 const int src0 = src[0 ]; | |
2775 const int src1 = src[ srcStride]; | |
2776 const int src2 = src[2*srcStride]; | |
2777 const int src3 = src[3*srcStride]; | |
2778 const int src4 = src[4*srcStride]; | |
2779 const int src5 = src[5*srcStride]; | |
2780 const int src6 = src[6*srcStride]; | |
2781 const int src7 = src[7*srcStride]; | |
2782 const int src8 = src[8*srcStride]; | |
2783 const int src9 = src[9*srcStride]; | |
2784 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; | |
2785 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; | |
2786 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; | |
2787 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; | |
2788 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; | |
2789 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; | |
2790 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; | |
2791 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; | |
2792 src++; | |
2793 dst++; | |
2794 } | |
2795 } | |
2796 | |
2797 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){ | |
2798 put_pixels8_c(dst, src, stride, 8); | |
2799 } | |
2800 | |
2801 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ | |
2802 uint8_t half[64]; | |
2803 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | |
2804 put_pixels8_l2(dst, src, half, stride, stride, 8, 8); | |
2805 } | |
2806 | |
2807 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ | |
2808 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); | |
2809 } | |
2810 | |
2811 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ | |
2812 uint8_t half[64]; | |
2813 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | |
2814 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8); | |
2815 } | |
2816 | |
2817 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ | |
2818 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); | |
2819 } | |
2820 | |
2821 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ | |
2822 uint8_t halfH[88]; | |
2823 uint8_t halfV[64]; | |
2824 uint8_t halfHV[64]; | |
2825 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2826 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); | |
2827 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | |
2828 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | |
2829 } | |
2830 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ | |
2831 uint8_t halfH[88]; | |
2832 uint8_t halfV[64]; | |
2833 uint8_t halfHV[64]; | |
2834 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2835 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); | |
2836 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | |
2837 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | |
2838 } | |
2839 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ | |
2840 uint8_t halfH[88]; | |
2841 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2842 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); | |
2843 } | |
2844 | |
1644 | 2845 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){ |
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
2846 if(CONFIG_ANY_H263) { |
1644 | 2847 int x; |
2848 const int strength= ff_h263_loop_filter_strength[qscale]; | |
2967 | 2849 |
1644 | 2850 for(x=0; x<8; x++){ |
2851 int d1, d2, ad1; | |
2852 int p0= src[x-2*stride]; | |
2853 int p1= src[x-1*stride]; | |
2854 int p2= src[x+0*stride]; | |
2855 int p3= src[x+1*stride]; | |
2856 int d = (p0 - p3 + 4*(p2 - p1)) / 8; | |
2857 | |
2858 if (d<-2*strength) d1= 0; | |
2859 else if(d<- strength) d1=-2*strength - d; | |
2860 else if(d< strength) d1= d; | |
2861 else if(d< 2*strength) d1= 2*strength - d; | |
2862 else d1= 0; | |
2967 | 2863 |
1644 | 2864 p1 += d1; |
2865 p2 -= d1; | |
2866 if(p1&256) p1= ~(p1>>31); | |
2867 if(p2&256) p2= ~(p2>>31); | |
2967 | 2868 |
1644 | 2869 src[x-1*stride] = p1; |
2870 src[x+0*stride] = p2; | |
2871 | |
4001 | 2872 ad1= FFABS(d1)>>1; |
2967 | 2873 |
4594 | 2874 d2= av_clip((p0-p3)/4, -ad1, ad1); |
2967 | 2875 |
1644 | 2876 src[x-2*stride] = p0 - d2; |
2877 src[x+ stride] = p3 + d2; | |
2878 } | |
5394
e9a6215f4e3a
help some gcc version to optimize out those functions
aurel
parents:
5291
diff
changeset
|
2879 } |
1644 | 2880 } |
2881 | |
2882 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ | |
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
2883 if(CONFIG_ANY_H263) { |
1644 | 2884 int y; |
2885 const int strength= ff_h263_loop_filter_strength[qscale]; | |
2967 | 2886 |
1644 | 2887 for(y=0; y<8; y++){ |
2888 int d1, d2, ad1; | |
2889 int p0= src[y*stride-2]; | |
2890 int p1= src[y*stride-1]; | |
2891 int p2= src[y*stride+0]; | |
2892 int p3= src[y*stride+1]; | |
2893 int d = (p0 - p3 + 4*(p2 - p1)) / 8; | |
2894 | |
2895 if (d<-2*strength) d1= 0; | |
2896 else if(d<- strength) d1=-2*strength - d; | |
2897 else if(d< strength) d1= d; | |
2898 else if(d< 2*strength) d1= 2*strength - d; | |
2899 else d1= 0; | |
2967 | 2900 |
1644 | 2901 p1 += d1; |
2902 p2 -= d1; | |
2903 if(p1&256) p1= ~(p1>>31); | |
2904 if(p2&256) p2= ~(p2>>31); | |
2967 | 2905 |
1644 | 2906 src[y*stride-1] = p1; |
2907 src[y*stride+0] = p2; | |
2908 | |
4001 | 2909 ad1= FFABS(d1)>>1; |
2967 | 2910 |
4594 | 2911 d2= av_clip((p0-p3)/4, -ad1, ad1); |
2967 | 2912 |
1644 | 2913 src[y*stride-2] = p0 - d2; |
2914 src[y*stride+1] = p3 + d2; | |
2915 } | |
5394
e9a6215f4e3a
help some gcc version to optimize out those functions
aurel
parents:
5291
diff
changeset
|
2916 } |
1644 | 2917 } |
936 | 2918 |
2045 | 2919 static void h261_loop_filter_c(uint8_t *src, int stride){ |
2920 int x,y,xy,yz; | |
2921 int temp[64]; | |
2922 | |
2923 for(x=0; x<8; x++){ | |
2924 temp[x ] = 4*src[x ]; | |
2925 temp[x + 7*8] = 4*src[x + 7*stride]; | |
2926 } | |
2927 for(y=1; y<7; y++){ | |
2928 for(x=0; x<8; x++){ | |
2929 xy = y * stride + x; | |
2930 yz = y * 8 + x; | |
2931 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride]; | |
2044
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2932 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2933 } |
2967 | 2934 |
2045 | 2935 for(y=0; y<8; y++){ |
2936 src[ y*stride] = (temp[ y*8] + 2)>>2; | |
2937 src[7+y*stride] = (temp[7+y*8] + 2)>>2; | |
2938 for(x=1; x<7; x++){ | |
2939 xy = y * stride + x; | |
2940 yz = y * 8 + x; | |
2941 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4; | |
2044
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2942 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2943 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2944 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2945 |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2946 static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) |
2633 | 2947 { |
2948 int i, d; | |
2949 for( i = 0; i < 4; i++ ) { | |
2950 if( tc0[i] < 0 ) { | |
2951 pix += 4*ystride; | |
2952 continue; | |
2953 } | |
2954 for( d = 0; d < 4; d++ ) { | |
2955 const int p0 = pix[-1*xstride]; | |
2956 const int p1 = pix[-2*xstride]; | |
2957 const int p2 = pix[-3*xstride]; | |
2958 const int q0 = pix[0]; | |
2959 const int q1 = pix[1*xstride]; | |
2960 const int q2 = pix[2*xstride]; | |
2967 | 2961 |
4001 | 2962 if( FFABS( p0 - q0 ) < alpha && |
2963 FFABS( p1 - p0 ) < beta && | |
2964 FFABS( q1 - q0 ) < beta ) { | |
2967 | 2965 |
2633 | 2966 int tc = tc0[i]; |
2967 int i_delta; | |
2967 | 2968 |
4001 | 2969 if( FFABS( p2 - p0 ) < beta ) { |
4594 | 2970 pix[-2*xstride] = p1 + av_clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] ); |
2633 | 2971 tc++; |
2972 } | |
4001 | 2973 if( FFABS( q2 - q0 ) < beta ) { |
4594 | 2974 pix[ xstride] = q1 + av_clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] ); |
2633 | 2975 tc++; |
2976 } | |
2967 | 2977 |
4594 | 2978 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); |
2979 pix[-xstride] = av_clip_uint8( p0 + i_delta ); /* p0' */ | |
2980 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */ | |
2633 | 2981 } |
2982 pix += ystride; | |
2983 } | |
2984 } | |
2985 } | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2986 static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
2633 | 2987 { |
2988 h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0); | |
2989 } | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2990 static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
2633 | 2991 { |
2992 h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0); | |
2993 } | |
2994 | |
8395
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
2995 static inline void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
2996 { |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
2997 int d; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
2998 for( d = 0; d < 16; d++ ) { |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
2999 const int p2 = pix[-3*xstride]; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3000 const int p1 = pix[-2*xstride]; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3001 const int p0 = pix[-1*xstride]; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3002 |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3003 const int q0 = pix[ 0*xstride]; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3004 const int q1 = pix[ 1*xstride]; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3005 const int q2 = pix[ 2*xstride]; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3006 |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3007 if( FFABS( p0 - q0 ) < alpha && |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3008 FFABS( p1 - p0 ) < beta && |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3009 FFABS( q1 - q0 ) < beta ) { |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3010 |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3011 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3012 if( FFABS( p2 - p0 ) < beta) |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3013 { |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3014 const int p3 = pix[-4*xstride]; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3015 /* p0', p1', p2' */ |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3016 pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3017 pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3018 pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3019 } else { |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3020 /* p0' */ |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3021 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3022 } |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3023 if( FFABS( q2 - q0 ) < beta) |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3024 { |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3025 const int q3 = pix[3*xstride]; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3026 /* q0', q1', q2' */ |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3027 pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3028 pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3029 pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3030 } else { |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3031 /* q0' */ |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3032 pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3033 } |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3034 }else{ |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3035 /* p0', q0' */ |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3036 pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3037 pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3038 } |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3039 } |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3040 pix += ystride; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3041 } |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3042 } |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3043 static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta) |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3044 { |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3045 h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta); |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3046 } |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3047 static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta) |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3048 { |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3049 h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta); |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3050 } |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
3051 |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3052 static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) |
2633 | 3053 { |
3054 int i, d; | |
3055 for( i = 0; i < 4; i++ ) { | |
3056 const int tc = tc0[i]; | |
3057 if( tc <= 0 ) { | |
3058 pix += 2*ystride; | |
3059 continue; | |
3060 } | |
3061 for( d = 0; d < 2; d++ ) { | |
3062 const int p0 = pix[-1*xstride]; | |
3063 const int p1 = pix[-2*xstride]; | |
3064 const int q0 = pix[0]; | |
3065 const int q1 = pix[1*xstride]; | |
3066 | |
4001 | 3067 if( FFABS( p0 - q0 ) < alpha && |
3068 FFABS( p1 - p0 ) < beta && | |
3069 FFABS( q1 - q0 ) < beta ) { | |
2633 | 3070 |
4594 | 3071 int delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); |
3072 | |
3073 pix[-xstride] = av_clip_uint8( p0 + delta ); /* p0' */ | |
3074 pix[0] = av_clip_uint8( q0 - delta ); /* q0' */ | |
2633 | 3075 } |
3076 pix += ystride; | |
3077 } | |
3078 } | |
3079 } | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3080 static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
2633 | 3081 { |
3082 h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0); | |
3083 } | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3084 static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
2633 | 3085 { |
3086 h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0); | |
3087 } | |
3088 | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3089 static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3090 { |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3091 int d; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3092 for( d = 0; d < 8; d++ ) { |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3093 const int p0 = pix[-1*xstride]; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3094 const int p1 = pix[-2*xstride]; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3095 const int q0 = pix[0]; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3096 const int q1 = pix[1*xstride]; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3097 |
4001 | 3098 if( FFABS( p0 - q0 ) < alpha && |
3099 FFABS( p1 - p0 ) < beta && | |
3100 FFABS( q1 - q0 ) < beta ) { | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3101 |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3102 pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3103 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3104 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3105 pix += ystride; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3106 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3107 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3108 static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3109 { |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3110 h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta); |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3111 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3112 static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3113 { |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3114 h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta); |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3115 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
3116 |
1708 | 3117 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 3118 { |
3119 int s, i; | |
3120 | |
3121 s = 0; | |
1708 | 3122 for(i=0;i<h;i++) { |
0 | 3123 s += abs(pix1[0] - pix2[0]); |
3124 s += abs(pix1[1] - pix2[1]); | |
3125 s += abs(pix1[2] - pix2[2]); | |
3126 s += abs(pix1[3] - pix2[3]); | |
3127 s += abs(pix1[4] - pix2[4]); | |
3128 s += abs(pix1[5] - pix2[5]); | |
3129 s += abs(pix1[6] - pix2[6]); | |
3130 s += abs(pix1[7] - pix2[7]); | |
3131 s += abs(pix1[8] - pix2[8]); | |
3132 s += abs(pix1[9] - pix2[9]); | |
3133 s += abs(pix1[10] - pix2[10]); | |
3134 s += abs(pix1[11] - pix2[11]); | |
3135 s += abs(pix1[12] - pix2[12]); | |
3136 s += abs(pix1[13] - pix2[13]); | |
3137 s += abs(pix1[14] - pix2[14]); | |
3138 s += abs(pix1[15] - pix2[15]); | |
3139 pix1 += line_size; | |
3140 pix2 += line_size; | |
3141 } | |
3142 return s; | |
3143 } | |
3144 | |
1708 | 3145 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 3146 { |
3147 int s, i; | |
3148 | |
3149 s = 0; | |
1708 | 3150 for(i=0;i<h;i++) { |
0 | 3151 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
3152 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
3153 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
3154 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
3155 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
3156 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
3157 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
3158 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
3159 s += abs(pix1[8] - avg2(pix2[8], pix2[9])); | |
3160 s += abs(pix1[9] - avg2(pix2[9], pix2[10])); | |
3161 s += abs(pix1[10] - avg2(pix2[10], pix2[11])); | |
3162 s += abs(pix1[11] - avg2(pix2[11], pix2[12])); | |
3163 s += abs(pix1[12] - avg2(pix2[12], pix2[13])); | |
3164 s += abs(pix1[13] - avg2(pix2[13], pix2[14])); | |
3165 s += abs(pix1[14] - avg2(pix2[14], pix2[15])); | |
3166 s += abs(pix1[15] - avg2(pix2[15], pix2[16])); | |
3167 pix1 += line_size; | |
3168 pix2 += line_size; | |
3169 } | |
3170 return s; | |
3171 } | |
3172 | |
1708 | 3173 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 3174 { |
3175 int s, i; | |
1064 | 3176 uint8_t *pix3 = pix2 + line_size; |
0 | 3177 |
3178 s = 0; | |
1708 | 3179 for(i=0;i<h;i++) { |
0 | 3180 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
3181 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
3182 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
3183 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
3184 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
3185 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
3186 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
3187 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
3188 s += abs(pix1[8] - avg2(pix2[8], pix3[8])); | |
3189 s += abs(pix1[9] - avg2(pix2[9], pix3[9])); | |
3190 s += abs(pix1[10] - avg2(pix2[10], pix3[10])); | |
3191 s += abs(pix1[11] - avg2(pix2[11], pix3[11])); | |
3192 s += abs(pix1[12] - avg2(pix2[12], pix3[12])); | |
3193 s += abs(pix1[13] - avg2(pix2[13], pix3[13])); | |
3194 s += abs(pix1[14] - avg2(pix2[14], pix3[14])); | |
3195 s += abs(pix1[15] - avg2(pix2[15], pix3[15])); | |
3196 pix1 += line_size; | |
3197 pix2 += line_size; | |
3198 pix3 += line_size; | |
3199 } | |
3200 return s; | |
3201 } | |
3202 | |
1708 | 3203 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 3204 { |
3205 int s, i; | |
1064 | 3206 uint8_t *pix3 = pix2 + line_size; |
0 | 3207 |
3208 s = 0; | |
1708 | 3209 for(i=0;i<h;i++) { |
0 | 3210 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
3211 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
3212 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
3213 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
3214 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
3215 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
3216 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
3217 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
3218 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); | |
3219 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); | |
3220 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); | |
3221 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); | |
3222 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); | |
3223 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); | |
3224 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); | |
3225 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); | |
3226 pix1 += line_size; | |
3227 pix2 += line_size; | |
3228 pix3 += line_size; | |
3229 } | |
3230 return s; | |
3231 } | |
3232 | |
1708 | 3233 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 3234 { |
3235 int s, i; | |
3236 | |
3237 s = 0; | |
1708 | 3238 for(i=0;i<h;i++) { |
294 | 3239 s += abs(pix1[0] - pix2[0]); |
3240 s += abs(pix1[1] - pix2[1]); | |
3241 s += abs(pix1[2] - pix2[2]); | |
3242 s += abs(pix1[3] - pix2[3]); | |
3243 s += abs(pix1[4] - pix2[4]); | |
3244 s += abs(pix1[5] - pix2[5]); | |
3245 s += abs(pix1[6] - pix2[6]); | |
3246 s += abs(pix1[7] - pix2[7]); | |
3247 pix1 += line_size; | |
3248 pix2 += line_size; | |
3249 } | |
3250 return s; | |
3251 } | |
3252 | |
1708 | 3253 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 3254 { |
3255 int s, i; | |
3256 | |
3257 s = 0; | |
1708 | 3258 for(i=0;i<h;i++) { |
294 | 3259 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
3260 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
3261 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
3262 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
3263 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
3264 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
3265 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
3266 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
3267 pix1 += line_size; | |
3268 pix2 += line_size; | |
3269 } | |
3270 return s; | |
3271 } | |
3272 | |
1708 | 3273 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 3274 { |
3275 int s, i; | |
1064 | 3276 uint8_t *pix3 = pix2 + line_size; |
294 | 3277 |
3278 s = 0; | |
1708 | 3279 for(i=0;i<h;i++) { |
294 | 3280 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
3281 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
3282 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
3283 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
3284 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
3285 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
3286 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
3287 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
3288 pix1 += line_size; | |
3289 pix2 += line_size; | |
3290 pix3 += line_size; | |
3291 } | |
3292 return s; | |
3293 } | |
3294 | |
1708 | 3295 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 3296 { |
3297 int s, i; | |
1064 | 3298 uint8_t *pix3 = pix2 + line_size; |
294 | 3299 |
3300 s = 0; | |
1708 | 3301 for(i=0;i<h;i++) { |
294 | 3302 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
3303 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
3304 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
3305 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
3306 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
3307 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
3308 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
3309 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
3310 pix1 += line_size; | |
3311 pix2 += line_size; | |
3312 pix3 += line_size; | |
3313 } | |
3314 return s; | |
3315 } | |
3316 | |
2834 | 3317 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ |
3318 MpegEncContext *c = v; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3319 int score1=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3320 int score2=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3321 int x,y; |
2066 | 3322 |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3323 for(y=0; y<h; y++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3324 for(x=0; x<16; x++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3325 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3326 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3327 if(y+1<h){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3328 for(x=0; x<15; x++){ |
4001 | 3329 score2+= FFABS( s1[x ] - s1[x +stride] |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3330 - s1[x+1] + s1[x+1+stride]) |
4001 | 3331 -FFABS( s2[x ] - s2[x +stride] |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3332 - s2[x+1] + s2[x+1+stride]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3333 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3334 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3335 s1+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3336 s2+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3337 } |
2066 | 3338 |
4001 | 3339 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; |
3340 else return score1 + FFABS(score2)*8; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3341 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3342 |
2834 | 3343 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ |
3344 MpegEncContext *c = v; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3345 int score1=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3346 int score2=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3347 int x,y; |
2967 | 3348 |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3349 for(y=0; y<h; y++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3350 for(x=0; x<8; x++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3351 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3352 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3353 if(y+1<h){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3354 for(x=0; x<7; x++){ |
4001 | 3355 score2+= FFABS( s1[x ] - s1[x +stride] |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3356 - s1[x+1] + s1[x+1+stride]) |
4001 | 3357 -FFABS( s2[x ] - s2[x +stride] |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3358 - s2[x+1] + s2[x+1+stride]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3359 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3360 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3361 s1+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3362 s2+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3363 } |
2967 | 3364 |
4001 | 3365 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight; |
3366 else return score1 + FFABS(score2)*8; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3367 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3368 |
1784 | 3369 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ |
3370 int i; | |
3371 unsigned int sum=0; | |
3372 | |
3373 for(i=0; i<8*8; i++){ | |
3374 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT)); | |
3375 int w= weight[i]; | |
3376 b>>= RECON_SHIFT; | |
3377 assert(-512<b && b<512); | |
3378 | |
3379 sum += (w*b)*(w*b)>>4; | |
3380 } | |
3381 return sum>>2; | |
3382 } | |
3383 | |
3384 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){ | |
3385 int i; | |
3386 | |
3387 for(i=0; i<8*8; i++){ | |
3388 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT); | |
2967 | 3389 } |
1784 | 3390 } |
3391 | |
1100 | 3392 /** |
3393 * permutes an 8x8 block. | |
1101 | 3394 * @param block the block which will be permuted according to the given permutation vector |
1100 | 3395 * @param permutation the permutation vector |
3396 * @param last the last non zero coefficient in scantable order, used to speed the permutation up | |
2967 | 3397 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not |
1101 | 3398 * (inverse) permutated to scantable order! |
1100 | 3399 */ |
1064 | 3400 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last) |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
3401 { |
764 | 3402 int i; |
945 | 3403 DCTELEM temp[64]; |
2967 | 3404 |
764 | 3405 if(last<=0) return; |
5129 | 3406 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
3407 |
764 | 3408 for(i=0; i<=last; i++){ |
3409 const int j= scantable[i]; | |
3410 temp[j]= block[j]; | |
3411 block[j]=0; | |
3412 } | |
2967 | 3413 |
764 | 3414 for(i=0; i<=last; i++){ |
3415 const int j= scantable[i]; | |
3416 const int perm_j= permutation[j]; | |
3417 block[perm_j]= temp[j]; | |
3418 } | |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
3419 } |
34 | 3420 |
1729 | 3421 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ |
3422 return 0; | |
3423 } | |
3424 | |
3425 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ | |
3426 int i; | |
2967 | 3427 |
8976
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
3428 memset(cmp, 0, sizeof(void*)*6); |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
3429 |
e7d87561b42b
Making the arrays accomodate an extra intra 8x8 cmp function
romansh
parents:
8785
diff
changeset
|
3430 for(i=0; i<6; i++){ |
1729 | 3431 switch(type&0xFF){ |
3432 case FF_CMP_SAD: | |
3433 cmp[i]= c->sad[i]; | |
3434 break; | |
3435 case FF_CMP_SATD: | |
3436 cmp[i]= c->hadamard8_diff[i]; | |
3437 break; | |
3438 case FF_CMP_SSE: | |
3439 cmp[i]= c->sse[i]; | |
3440 break; | |
3441 case FF_CMP_DCT: | |
3442 cmp[i]= c->dct_sad[i]; | |
3443 break; | |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3444 case FF_CMP_DCT264: |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3445 cmp[i]= c->dct264_sad[i]; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3446 break; |
2382 | 3447 case FF_CMP_DCTMAX: |
3448 cmp[i]= c->dct_max[i]; | |
3449 break; | |
1729 | 3450 case FF_CMP_PSNR: |
3451 cmp[i]= c->quant_psnr[i]; | |
3452 break; | |
3453 case FF_CMP_BIT: | |
3454 cmp[i]= c->bit[i]; | |
3455 break; | |
3456 case FF_CMP_RD: | |
3457 cmp[i]= c->rd[i]; | |
3458 break; | |
3459 case FF_CMP_VSAD: | |
3460 cmp[i]= c->vsad[i]; | |
3461 break; | |
3462 case FF_CMP_VSSE: | |
3463 cmp[i]= c->vsse[i]; | |
3464 break; | |
3465 case FF_CMP_ZERO: | |
3466 cmp[i]= zero_cmp; | |
3467 break; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3468 case FF_CMP_NSSE: |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3469 cmp[i]= c->nsse[i]; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3470 break; |
8590 | 3471 #if CONFIG_SNOW_ENCODER |
2184 | 3472 case FF_CMP_W53: |
3473 cmp[i]= c->w53[i]; | |
3474 break; | |
3475 case FF_CMP_W97: | |
3476 cmp[i]= c->w97[i]; | |
3477 break; | |
3373
b8996cc5ccae
Disable w53 and w97 cmp methods when snow encoder is disabled
gpoirier
parents:
3323
diff
changeset
|
3478 #endif |
1729 | 3479 default: |
3480 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); | |
3481 } | |
3482 } | |
3483 } | |
3484 | |
8288 | 3485 static void clear_block_c(DCTELEM *block) |
3486 { | |
3487 memset(block, 0, sizeof(DCTELEM)*64); | |
3488 } | |
3489 | |
1101 | 3490 /** |
3491 * memset(blocks, 0, sizeof(DCTELEM)*6*64) | |
3492 */ | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3493 static void clear_blocks_c(DCTELEM *blocks) |
296 | 3494 { |
3495 memset(blocks, 0, sizeof(DCTELEM)*6*64); | |
3496 } | |
3497 | |
866 | 3498 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ |
6385 | 3499 long i; |
3500 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ | |
3501 long a = *(long*)(src+i); | |
3502 long b = *(long*)(dst+i); | |
3503 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); | |
866 | 3504 } |
3505 for(; i<w; i++) | |
3506 dst[i+0] += src[i+0]; | |
3507 } | |
3508 | |
6384 | 3509 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ |
6385 | 3510 long i; |
6384 | 3511 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ |
3512 long a = *(long*)(src1+i); | |
3513 long b = *(long*)(src2+i); | |
6385 | 3514 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80); |
6384 | 3515 } |
3516 for(; i<w; i++) | |
3517 dst[i] = src1[i]+src2[i]; | |
3518 } | |
3519 | |
866 | 3520 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ |
6385 | 3521 long i; |
8590 | 3522 #if !HAVE_FAST_UNALIGNED |
6385 | 3523 if((long)src2 & (sizeof(long)-1)){ |
6386 | 3524 for(i=0; i+7<w; i+=8){ |
3525 dst[i+0] = src1[i+0]-src2[i+0]; | |
3526 dst[i+1] = src1[i+1]-src2[i+1]; | |
3527 dst[i+2] = src1[i+2]-src2[i+2]; | |
3528 dst[i+3] = src1[i+3]-src2[i+3]; | |
3529 dst[i+4] = src1[i+4]-src2[i+4]; | |
3530 dst[i+5] = src1[i+5]-src2[i+5]; | |
3531 dst[i+6] = src1[i+6]-src2[i+6]; | |
3532 dst[i+7] = src1[i+7]-src2[i+7]; | |
3533 } | |
6385 | 3534 }else |
3535 #endif | |
3536 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){ | |
3537 long a = *(long*)(src1+i); | |
3538 long b = *(long*)(src2+i); | |
3539 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80); | |
3540 } | |
866 | 3541 for(; i<w; i++) |
3542 dst[i+0] = src1[i+0]-src2[i+0]; | |
3543 } | |
3544 | |
8760 | 3545 static void add_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *diff, int w, int *left, int *left_top){ |
3546 int i; | |
3547 uint8_t l, lt; | |
3548 | |
3549 l= *left; | |
3550 lt= *left_top; | |
3551 | |
3552 for(i=0; i<w; i++){ | |
3553 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i]; | |
3554 lt= src1[i]; | |
3555 dst[i]= l; | |
3556 } | |
3557 | |
3558 *left= l; | |
3559 *left_top= lt; | |
3560 } | |
3561 | |
1527 | 3562 static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ |
3563 int i; | |
3564 uint8_t l, lt; | |
3565 | |
3566 l= *left; | |
3567 lt= *left_top; | |
3568 | |
3569 for(i=0; i<w; i++){ | |
3570 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); | |
3571 lt= src1[i]; | |
3572 l= src2[i]; | |
3573 dst[i]= l - pred; | |
2967 | 3574 } |
1527 | 3575 |
3576 *left= l; | |
3577 *left_top= lt; | |
3578 } | |
3579 | |
936 | 3580 #define BUTTERFLY2(o1,o2,i1,i2) \ |
3581 o1= (i1)+(i2);\ | |
3582 o2= (i1)-(i2); | |
3583 | |
3584 #define BUTTERFLY1(x,y) \ | |
3585 {\ | |
3586 int a,b;\ | |
3587 a= x;\ | |
3588 b= y;\ | |
3589 x= a+b;\ | |
3590 y= a-b;\ | |
3591 } | |
3592 | |
4001 | 3593 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y))) |
936 | 3594 |
1708 | 3595 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ |
936 | 3596 int i; |
3597 int temp[64]; | |
3598 int sum=0; | |
2967 | 3599 |
1708 | 3600 assert(h==8); |
936 | 3601 |
3602 for(i=0; i<8; i++){ | |
3603 //FIXME try pointer walks | |
3604 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]); | |
3605 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]); | |
3606 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]); | |
3607 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]); | |
2967 | 3608 |
936 | 3609 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); |
3610 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | |
3611 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | |
3612 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | |
2967 | 3613 |
936 | 3614 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); |
3615 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | |
3616 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | |
3617 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | |
3618 } | |
3619 | |
3620 for(i=0; i<8; i++){ | |
3621 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | |
3622 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | |
3623 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | |
3624 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | |
2967 | 3625 |
936 | 3626 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); |
3627 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | |
3628 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | |
3629 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | |
3630 | |
2967 | 3631 sum += |
936 | 3632 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) |
3633 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | |
3634 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | |
3635 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | |
3636 } | |
3637 #if 0 | |
3638 static int maxi=0; | |
3639 if(sum>maxi){ | |
3640 maxi=sum; | |
3641 printf("MAX:%d\n", maxi); | |
3642 } | |
3643 #endif | |
3644 return sum; | |
3645 } | |
3646 | |
1729 | 3647 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){ |
936 | 3648 int i; |
3649 int temp[64]; | |
3650 int sum=0; | |
2967 | 3651 |
1729 | 3652 assert(h==8); |
2967 | 3653 |
936 | 3654 for(i=0; i<8; i++){ |
3655 //FIXME try pointer walks | |
1729 | 3656 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]); |
3657 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]); | |
3658 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]); | |
3659 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]); | |
2967 | 3660 |
936 | 3661 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); |
3662 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | |
3663 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | |
3664 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | |
2967 | 3665 |
936 | 3666 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); |
3667 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | |
3668 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | |
3669 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | |
3670 } | |
3671 | |
3672 for(i=0; i<8; i++){ | |
3673 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | |
3674 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | |
3675 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | |
3676 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | |
2967 | 3677 |
936 | 3678 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); |
3679 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | |
3680 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | |
3681 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | |
2967 | 3682 |
3683 sum += | |
936 | 3684 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) |
3685 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | |
3686 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | |
3687 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | |
3688 } | |
2967 | 3689 |
4001 | 3690 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean |
2967 | 3691 |
936 | 3692 return sum; |
3693 } | |
3694 | |
1708 | 3695 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
936 | 3696 MpegEncContext * const s= (MpegEncContext *)c; |
4988
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
3697 DECLARE_ALIGNED_16(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); |
1016 | 3698 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
2967 | 3699 |
1708 | 3700 assert(h==8); |
936 | 3701 |
3702 s->dsp.diff_pixels(temp, src1, src2, stride); | |
1092 | 3703 s->dsp.fdct(temp); |
4988
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
3704 return s->dsp.sum_abs_dctelem(temp); |
936 | 3705 } |
3706 | |
8590 | 3707 #if CONFIG_GPL |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3708 #define DCT8_1D {\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3709 const int s07 = SRC(0) + SRC(7);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3710 const int s16 = SRC(1) + SRC(6);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3711 const int s25 = SRC(2) + SRC(5);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3712 const int s34 = SRC(3) + SRC(4);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3713 const int a0 = s07 + s34;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3714 const int a1 = s16 + s25;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3715 const int a2 = s07 - s34;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3716 const int a3 = s16 - s25;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3717 const int d07 = SRC(0) - SRC(7);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3718 const int d16 = SRC(1) - SRC(6);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3719 const int d25 = SRC(2) - SRC(5);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3720 const int d34 = SRC(3) - SRC(4);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3721 const int a4 = d16 + d25 + (d07 + (d07>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3722 const int a5 = d07 - d34 - (d25 + (d25>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3723 const int a6 = d07 + d34 - (d16 + (d16>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3724 const int a7 = d16 - d25 + (d34 + (d34>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3725 DST(0, a0 + a1 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3726 DST(1, a4 + (a7>>2)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3727 DST(2, a2 + (a3>>1)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3728 DST(3, a5 + (a6>>2)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3729 DST(4, a0 - a1 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3730 DST(5, a6 - (a5>>2)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3731 DST(6, (a2>>1) - a3 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3732 DST(7, (a4>>2) - a7 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3733 } |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3734 |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3735 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3736 MpegEncContext * const s= (MpegEncContext *)c; |
5256 | 3737 DCTELEM dct[8][8]; |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3738 int i; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3739 int sum=0; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3740 |
5256 | 3741 s->dsp.diff_pixels(dct[0], src1, src2, stride); |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3742 |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3743 #define SRC(x) dct[i][x] |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3744 #define DST(x,v) dct[i][x]= v |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3745 for( i = 0; i < 8; i++ ) |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3746 DCT8_1D |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3747 #undef SRC |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3748 #undef DST |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3749 |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3750 #define SRC(x) dct[x][i] |
4001 | 3751 #define DST(x,v) sum += FFABS(v) |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3752 for( i = 0; i < 8; i++ ) |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3753 DCT8_1D |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3754 #undef SRC |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3755 #undef DST |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3756 return sum; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3757 } |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3758 #endif |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3759 |
2382 | 3760 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
3761 MpegEncContext * const s= (MpegEncContext *)c; | |
3089 | 3762 DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); |
2382 | 3763 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
3764 int sum=0, i; | |
2967 | 3765 |
2382 | 3766 assert(h==8); |
3767 | |
3768 s->dsp.diff_pixels(temp, src1, src2, stride); | |
3769 s->dsp.fdct(temp); | |
3770 | |
3771 for(i=0; i<64; i++) | |
4001 | 3772 sum= FFMAX(sum, FFABS(temp[i])); |
2967 | 3773 |
2382 | 3774 return sum; |
3775 } | |
3776 | |
1708 | 3777 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
936 | 3778 MpegEncContext * const s= (MpegEncContext *)c; |
3089 | 3779 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]); |
1016 | 3780 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
3781 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; | |
936 | 3782 int sum=0, i; |
3783 | |
1708 | 3784 assert(h==8); |
936 | 3785 s->mb_intra=0; |
2967 | 3786 |
936 | 3787 s->dsp.diff_pixels(temp, src1, src2, stride); |
2967 | 3788 |
936 | 3789 memcpy(bak, temp, 64*sizeof(DCTELEM)); |
2967 | 3790 |
1013 | 3791 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
1689 | 3792 s->dct_unquantize_inter(s, temp, 0, s->qscale); |
6001 | 3793 ff_simple_idct(temp); //FIXME |
2967 | 3794 |
936 | 3795 for(i=0; i<64; i++) |
3796 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); | |
2967 | 3797 |
936 | 3798 return sum; |
3799 } | |
3800 | |
1708 | 3801 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
1007 | 3802 MpegEncContext * const s= (MpegEncContext *)c; |
1064 | 3803 const uint8_t *scantable= s->intra_scantable.permutated; |
3089 | 3804 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); |
3805 DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]); | |
1016 | 3806 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
3807 uint8_t * const bak= (uint8_t*)aligned_bak; | |
6719 | 3808 int i, last, run, bits, level, distortion, start_i; |
1007 | 3809 const int esc_length= s->ac_esc_length; |
3810 uint8_t * length; | |
3811 uint8_t * last_length; | |
2967 | 3812 |
1708 | 3813 assert(h==8); |
3814 | |
1007 | 3815 for(i=0; i<8; i++){ |
3816 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0]; | |
3817 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1]; | |
3818 } | |
3819 | |
3820 s->dsp.diff_pixels(temp, src1, src2, stride); | |
3821 | |
1013 | 3822 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
3823 | |
3824 bits=0; | |
2967 | 3825 |
1013 | 3826 if (s->mb_intra) { |
2967 | 3827 start_i = 1; |
1013 | 3828 length = s->intra_ac_vlc_length; |
3829 last_length= s->intra_ac_vlc_last_length; | |
3830 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma | |
3831 } else { | |
3832 start_i = 0; | |
3833 length = s->inter_ac_vlc_length; | |
3834 last_length= s->inter_ac_vlc_last_length; | |
3835 } | |
2967 | 3836 |
1013 | 3837 if(last>=start_i){ |
1007 | 3838 run=0; |
3839 for(i=start_i; i<last; i++){ | |
3840 int j= scantable[i]; | |
3841 level= temp[j]; | |
2967 | 3842 |
1007 | 3843 if(level){ |
3844 level+=64; | |
3845 if((level&(~127)) == 0){ | |
3846 bits+= length[UNI_AC_ENC_INDEX(run, level)]; | |
3847 }else | |
3848 bits+= esc_length; | |
3849 run=0; | |
3850 }else | |
3851 run++; | |
3852 } | |
3853 i= scantable[last]; | |
2967 | 3854 |
1011 | 3855 level= temp[i] + 64; |
3856 | |
3857 assert(level - 64); | |
2967 | 3858 |
1007 | 3859 if((level&(~127)) == 0){ |
3860 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | |
3861 }else | |
3862 bits+= esc_length; | |
2967 | 3863 |
1013 | 3864 } |
3865 | |
3866 if(last>=0){ | |
1689 | 3867 if(s->mb_intra) |
3868 s->dct_unquantize_intra(s, temp, 0, s->qscale); | |
3869 else | |
3870 s->dct_unquantize_inter(s, temp, 0, s->qscale); | |
1007 | 3871 } |
2967 | 3872 |
1092 | 3873 s->dsp.idct_add(bak, stride, temp); |
2967 | 3874 |
6719 | 3875 distortion= s->dsp.sse[1](NULL, bak, src1, stride, 8); |
3876 | |
3877 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7); | |
1007 | 3878 } |
3879 | |
1708 | 3880 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
1007 | 3881 MpegEncContext * const s= (MpegEncContext *)c; |
1064 | 3882 const uint8_t *scantable= s->intra_scantable.permutated; |
3089 | 3883 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); |
1016 | 3884 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
1007 | 3885 int i, last, run, bits, level, start_i; |
3886 const int esc_length= s->ac_esc_length; | |
3887 uint8_t * length; | |
3888 uint8_t * last_length; | |
1708 | 3889 |
3890 assert(h==8); | |
2967 | 3891 |
1013 | 3892 s->dsp.diff_pixels(temp, src1, src2, stride); |
1007 | 3893 |
1013 | 3894 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
3895 | |
3896 bits=0; | |
2967 | 3897 |
1007 | 3898 if (s->mb_intra) { |
2967 | 3899 start_i = 1; |
1007 | 3900 length = s->intra_ac_vlc_length; |
3901 last_length= s->intra_ac_vlc_last_length; | |
1013 | 3902 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma |
1007 | 3903 } else { |
3904 start_i = 0; | |
3905 length = s->inter_ac_vlc_length; | |
3906 last_length= s->inter_ac_vlc_last_length; | |
3907 } | |
2967 | 3908 |
1013 | 3909 if(last>=start_i){ |
1007 | 3910 run=0; |
3911 for(i=start_i; i<last; i++){ | |
3912 int j= scantable[i]; | |
3913 level= temp[j]; | |
2967 | 3914 |
1007 | 3915 if(level){ |
3916 level+=64; | |
3917 if((level&(~127)) == 0){ | |
3918 bits+= length[UNI_AC_ENC_INDEX(run, level)]; | |
3919 }else | |
3920 bits+= esc_length; | |
3921 run=0; | |
3922 }else | |
3923 run++; | |
3924 } | |
3925 i= scantable[last]; | |
2967 | 3926 |
1013 | 3927 level= temp[i] + 64; |
2967 | 3928 |
1013 | 3929 assert(level - 64); |
2967 | 3930 |
1007 | 3931 if((level&(~127)) == 0){ |
3932 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | |
3933 }else | |
3934 bits+= esc_length; | |
3935 } | |
3936 | |
3937 return bits; | |
3938 } | |
3939 | |
8978 | 3940 #define VSAD_INTRA(size) \ |
3941 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \ | |
3942 int score=0; \ | |
3943 int x,y; \ | |
3944 \ | |
3945 for(y=1; y<h; y++){ \ | |
3946 for(x=0; x<size; x+=4){ \ | |
3947 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \ | |
3948 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \ | |
3949 } \ | |
3950 s+= stride; \ | |
3951 } \ | |
3952 \ | |
3953 return score; \ | |
1729 | 3954 } |
8978 | 3955 VSAD_INTRA(8) |
3956 VSAD_INTRA(16) | |
1729 | 3957 |
3958 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | |
3959 int score=0; | |
3960 int x,y; | |
2967 | 3961 |
1729 | 3962 for(y=1; y<h; y++){ |
3963 for(x=0; x<16; x++){ | |
4001 | 3964 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); |
1729 | 3965 } |
3966 s1+= stride; | |
3967 s2+= stride; | |
3968 } | |
2967 | 3969 |
1729 | 3970 return score; |
3971 } | |
3972 | |
3973 #define SQ(a) ((a)*(a)) | |
8978 | 3974 #define VSSE_INTRA(size) \ |
3975 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \ | |
3976 int score=0; \ | |
3977 int x,y; \ | |
3978 \ | |
3979 for(y=1; y<h; y++){ \ | |
3980 for(x=0; x<size; x+=4){ \ | |
3981 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \ | |
3982 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \ | |
3983 } \ | |
3984 s+= stride; \ | |
3985 } \ | |
3986 \ | |
3987 return score; \ | |
1729 | 3988 } |
8978 | 3989 VSSE_INTRA(8) |
3990 VSSE_INTRA(16) | |
1729 | 3991 |
3992 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | |
3993 int score=0; | |
3994 int x,y; | |
2967 | 3995 |
1729 | 3996 for(y=1; y<h; y++){ |
3997 for(x=0; x<16; x++){ | |
3998 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); | |
3999 } | |
4000 s1+= stride; | |
4001 s2+= stride; | |
4002 } | |
2967 | 4003 |
1729 | 4004 return score; |
4005 } | |
4006 | |
5255 | 4007 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, |
4008 int size){ | |
4749 | 4009 int score=0; |
4010 int i; | |
4011 for(i=0; i<size; i++) | |
4012 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]); | |
4013 return score; | |
4014 } | |
4015 | |
6056
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
4016 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) |
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
4017 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) |
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
4018 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) |
8590 | 4019 #if CONFIG_GPL |
6056
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
4020 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c) |
3013 | 4021 #endif |
6056
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
4022 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c) |
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
4023 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) |
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
4024 WRAPPER8_16_SQ(rd8x8_c, rd16_c) |
558c1fd0ee72
Fix typo in macro name: WARPER8_16_SQ --> WRAPPER8_16_SQ.
diego
parents:
6054
diff
changeset
|
4025 WRAPPER8_16_SQ(bit8x8_c, bit16_c) |
936 | 4026 |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4027 static void vector_fmul_c(float *dst, const float *src, int len){ |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4028 int i; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4029 for(i=0; i<len; i++) |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4030 dst[i] *= src[i]; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4031 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4032 |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4033 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){ |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4034 int i; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4035 src1 += len-1; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4036 for(i=0; i<len; i++) |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4037 dst[i] = src0[i] * src1[-i]; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4038 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4039 |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4040 void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step){ |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4041 int i; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4042 for(i=0; i<len; i++) |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4043 dst[i*step] = src0[i] * src1[i] + src2[i] + src3; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4044 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4045 |
7261 | 4046 void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){ |
7263 | 4047 int i,j; |
4048 dst += len; | |
4049 win += len; | |
4050 src0+= len; | |
4051 for(i=-len, j=len-1; i<0; i++, j--) { | |
4052 float s0 = src0[i]; | |
4053 float s1 = src1[j]; | |
4054 float wi = win[i]; | |
4055 float wj = win[j]; | |
4056 dst[i] = s0*wj - s1*wi + add_bias; | |
4057 dst[j] = s0*wi + s1*wj + add_bias; | |
4058 } | |
7261 | 4059 } |
4060 | |
7564 | 4061 static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){ |
4062 int i; | |
4063 for(i=0; i<len; i++) | |
4064 dst[i] = src[i] * mul; | |
4065 } | |
4066 | |
7261 | 4067 static av_always_inline int float_to_int16_one(const float *src){ |
4068 int_fast32_t tmp = *(const int32_t*)src; | |
4069 if(tmp & 0xf0000){ | |
4070 tmp = (0x43c0ffff - tmp)>>31; | |
4071 // is this faster on some gcc/cpu combinations? | |
4072 // if(tmp > 0x43c0ffff) tmp = 0xFFFF; | |
4073 // else tmp = 0; | |
4074 } | |
4075 return tmp - 0x8000; | |
4076 } | |
4077 | |
7218 | 4078 void ff_float_to_int16_c(int16_t *dst, const float *src, long len){ |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4079 int i; |
7261 | 4080 for(i=0; i<len; i++) |
4081 dst[i] = float_to_int16_one(src+i); | |
4082 } | |
4083 | |
7286
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
4084 void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){ |
7261 | 4085 int i,j,c; |
4086 if(channels==2){ | |
4087 for(i=0; i<len; i++){ | |
7286
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
4088 dst[2*i] = float_to_int16_one(src[0]+i); |
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
4089 dst[2*i+1] = float_to_int16_one(src[1]+i); |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4090 } |
7261 | 4091 }else{ |
7286
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
4092 for(c=0; c<channels; c++) |
7261 | 4093 for(i=0, j=c; i<len; i++, j+=channels) |
7286
e267f2519248
float_to_int16_interleave: change src to an array of pointers instead of assuming it's contiguous.
lorenm
parents:
7263
diff
changeset
|
4094 dst[j] = float_to_int16_one(src[c]+i); |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4095 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4096 } |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4097 |
7203
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4098 static void add_int16_c(int16_t * v1, int16_t * v2, int order) |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4099 { |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4100 while (order--) |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4101 *v1++ += *v2++; |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4102 } |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4103 |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4104 static void sub_int16_c(int16_t * v1, int16_t * v2, int order) |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4105 { |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4106 while (order--) |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4107 *v1++ -= *v2++; |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4108 } |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4109 |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4110 static int32_t scalarproduct_int16_c(int16_t * v1, int16_t * v2, int order, int shift) |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4111 { |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4112 int res = 0; |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4113 |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4114 while (order--) |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4115 res += (*v1++ * *v2++) >> shift; |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4116 |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4117 return res; |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4118 } |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4119 |
5887 | 4120 #define W0 2048 |
4121 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ | |
4122 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ | |
4123 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ | |
4124 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */ | |
4125 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ | |
4126 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ | |
4127 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ | |
4128 | |
4129 static void wmv2_idct_row(short * b) | |
4130 { | |
4131 int s1,s2; | |
4132 int a0,a1,a2,a3,a4,a5,a6,a7; | |
4133 /*step 1*/ | |
4134 a1 = W1*b[1]+W7*b[7]; | |
4135 a7 = W7*b[1]-W1*b[7]; | |
4136 a5 = W5*b[5]+W3*b[3]; | |
4137 a3 = W3*b[5]-W5*b[3]; | |
4138 a2 = W2*b[2]+W6*b[6]; | |
4139 a6 = W6*b[2]-W2*b[6]; | |
4140 a0 = W0*b[0]+W0*b[4]; | |
4141 a4 = W0*b[0]-W0*b[4]; | |
4142 /*step 2*/ | |
4143 s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7, | |
4144 s2 = (181*(a1-a5-a7+a3)+128)>>8; | |
4145 /*step 3*/ | |
4146 b[0] = (a0+a2+a1+a5 + (1<<7))>>8; | |
4147 b[1] = (a4+a6 +s1 + (1<<7))>>8; | |
4148 b[2] = (a4-a6 +s2 + (1<<7))>>8; | |
4149 b[3] = (a0-a2+a7+a3 + (1<<7))>>8; | |
4150 b[4] = (a0-a2-a7-a3 + (1<<7))>>8; | |
4151 b[5] = (a4-a6 -s2 + (1<<7))>>8; | |
4152 b[6] = (a4+a6 -s1 + (1<<7))>>8; | |
4153 b[7] = (a0+a2-a1-a5 + (1<<7))>>8; | |
4154 } | |
4155 static void wmv2_idct_col(short * b) | |
4156 { | |
4157 int s1,s2; | |
4158 int a0,a1,a2,a3,a4,a5,a6,a7; | |
4159 /*step 1, with extended precision*/ | |
4160 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3; | |
4161 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3; | |
4162 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3; | |
4163 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3; | |
4164 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3; | |
4165 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3; | |
4166 a0 = (W0*b[8*0]+W0*b[8*4] )>>3; | |
4167 a4 = (W0*b[8*0]-W0*b[8*4] )>>3; | |
4168 /*step 2*/ | |
4169 s1 = (181*(a1-a5+a7-a3)+128)>>8; | |
4170 s2 = (181*(a1-a5-a7+a3)+128)>>8; | |
4171 /*step 3*/ | |
4172 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14; | |
4173 b[8*1] = (a4+a6 +s1 + (1<<13))>>14; | |
4174 b[8*2] = (a4-a6 +s2 + (1<<13))>>14; | |
4175 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14; | |
4176 | |
4177 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14; | |
4178 b[8*5] = (a4-a6 -s2 + (1<<13))>>14; | |
4179 b[8*6] = (a4+a6 -s1 + (1<<13))>>14; | |
4180 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14; | |
4181 } | |
4182 void ff_wmv2_idct_c(short * block){ | |
4183 int i; | |
4184 | |
4185 for(i=0;i<64;i+=8){ | |
4186 wmv2_idct_row(block+i); | |
4187 } | |
4188 for(i=0;i<8;i++){ | |
4189 wmv2_idct_col(block+i); | |
4190 } | |
4191 } | |
1092 | 4192 /* XXX: those functions should be suppressed ASAP when all IDCTs are |
4193 converted */ | |
5887 | 4194 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block) |
4195 { | |
4196 ff_wmv2_idct_c(block); | |
4197 put_pixels_clamped_c(block, dest, line_size); | |
4198 } | |
4199 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block) | |
4200 { | |
4201 ff_wmv2_idct_c(block); | |
4202 add_pixels_clamped_c(block, dest, line_size); | |
4203 } | |
1092 | 4204 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) |
4205 { | |
4206 j_rev_dct (block); | |
4207 put_pixels_clamped_c(block, dest, line_size); | |
4208 } | |
4209 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) | |
4210 { | |
4211 j_rev_dct (block); | |
4212 add_pixels_clamped_c(block, dest, line_size); | |
4213 } | |
4214 | |
2256 | 4215 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block) |
4216 { | |
4217 j_rev_dct4 (block); | |
4218 put_pixels_clamped4_c(block, dest, line_size); | |
4219 } | |
4220 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block) | |
4221 { | |
4222 j_rev_dct4 (block); | |
4223 add_pixels_clamped4_c(block, dest, line_size); | |
4224 } | |
4225 | |
2257 | 4226 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block) |
4227 { | |
4228 j_rev_dct2 (block); | |
4229 put_pixels_clamped2_c(block, dest, line_size); | |
4230 } | |
4231 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) | |
4232 { | |
4233 j_rev_dct2 (block); | |
4234 add_pixels_clamped2_c(block, dest, line_size); | |
4235 } | |
4236 | |
2259 | 4237 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) |
4238 { | |
4176 | 4239 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2259 | 4240 |
4241 dest[0] = cm[(block[0] + 4)>>3]; | |
4242 } | |
4243 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) | |
4244 { | |
4176 | 4245 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
2259 | 4246 |
4247 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; | |
4248 } | |
4249 | |
5143 | 4250 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; } |
3215
06f98047ff26
prefetch pixels for future motion compensation. 2-5% faster h264.
lorenm
parents:
3199
diff
changeset
|
4251 |
1201 | 4252 /* init static data */ |
4197 | 4253 void dsputil_static_init(void) |
0 | 4254 { |
751 | 4255 int i; |
0 | 4256 |
4176 | 4257 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i; |
1201 | 4258 for(i=0;i<MAX_NEG_CROP;i++) { |
4176 | 4259 ff_cropTbl[i] = 0; |
4260 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255; | |
1201 | 4261 } |
2967 | 4262 |
1201 | 4263 for(i=0;i<512;i++) { |
4179 | 4264 ff_squareTbl[i] = (i - 256) * (i - 256); |
1201 | 4265 } |
2967 | 4266 |
4197 | 4267 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; |
1201 | 4268 } |
0 | 4269 |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4270 int ff_check_alignment(void){ |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4271 static int did_fail=0; |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4272 DECLARE_ALIGNED_16(int, aligned); |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4273 |
5150 | 4274 if((long)&aligned & 15){ |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4275 if(!did_fail){ |
8590 | 4276 #if HAVE_MMX || HAVE_ALTIVEC |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4277 av_log(NULL, AV_LOG_ERROR, |
4292 | 4278 "Compiler did not align stack variables. Libavcodec has been miscompiled\n" |
4279 "and may be very slow or crash. This is not a bug in libavcodec,\n" | |
5542
b0a566346fb1
Add attribute that forces alignment of stack to functions that need it.
ramiro
parents:
5520
diff
changeset
|
4280 "but in the compiler. You may try recompiling using gcc >= 4.2.\n" |
b0a566346fb1
Add attribute that forces alignment of stack to functions that need it.
ramiro
parents:
5520
diff
changeset
|
4281 "Do not report crashes to FFmpeg developers.\n"); |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4282 #endif |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4283 did_fail=1; |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4284 } |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4285 return -1; |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4286 } |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4287 return 0; |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4288 } |
861 | 4289 |
1201 | 4290 void dsputil_init(DSPContext* c, AVCodecContext *avctx) |
4291 { | |
4292 int i; | |
0 | 4293 |
4281
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4294 ff_check_alignment(); |
de525a2b41db
ff_check_alignment to warn the user about a missaligned stack
michael
parents:
4240
diff
changeset
|
4295 |
8590 | 4296 #if CONFIG_ENCODERS |
1567 | 4297 if(avctx->dct_algo==FF_DCT_FASTINT) { |
1092 | 4298 c->fdct = fdct_ifast; |
2979 | 4299 c->fdct248 = fdct_ifast248; |
2967 | 4300 } |
1567 | 4301 else if(avctx->dct_algo==FF_DCT_FAAN) { |
1557 | 4302 c->fdct = ff_faandct; |
2979 | 4303 c->fdct248 = ff_faandct248; |
2967 | 4304 } |
1567 | 4305 else { |
1092 | 4306 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default |
2979 | 4307 c->fdct248 = ff_fdct248_islow; |
1567 | 4308 } |
1092 | 4309 #endif //CONFIG_ENCODERS |
4310 | |
2256 | 4311 if(avctx->lowres==1){ |
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4312 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){ |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4313 c->idct_put= ff_jref_idct4_put; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4314 c->idct_add= ff_jref_idct4_add; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4315 }else{ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4316 c->idct_put= ff_h264_lowres_idct_put_c; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4317 c->idct_add= ff_h264_lowres_idct_add_c; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4318 } |
2256 | 4319 c->idct = j_rev_dct4; |
1092 | 4320 c->idct_permutation_type= FF_NO_IDCT_PERM; |
2257 | 4321 }else if(avctx->lowres==2){ |
4322 c->idct_put= ff_jref_idct2_put; | |
4323 c->idct_add= ff_jref_idct2_add; | |
4324 c->idct = j_rev_dct2; | |
4325 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
2259 | 4326 }else if(avctx->lowres==3){ |
4327 c->idct_put= ff_jref_idct1_put; | |
4328 c->idct_add= ff_jref_idct1_add; | |
4329 c->idct = j_rev_dct1; | |
4330 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
2256 | 4331 }else{ |
4332 if(avctx->idct_algo==FF_IDCT_INT){ | |
4333 c->idct_put= ff_jref_idct_put; | |
4334 c->idct_add= ff_jref_idct_add; | |
4335 c->idct = j_rev_dct; | |
4336 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; | |
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4337 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER || CONFIG_THEORA_DECODER ) && |
5007 | 4338 avctx->idct_algo==FF_IDCT_VP3){ |
2693 | 4339 c->idct_put= ff_vp3_idct_put_c; |
4340 c->idct_add= ff_vp3_idct_add_c; | |
4341 c->idct = ff_vp3_idct_c; | |
4342 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
5887 | 4343 }else if(avctx->idct_algo==FF_IDCT_WMV2){ |
4344 c->idct_put= ff_wmv2_idct_put_c; | |
4345 c->idct_add= ff_wmv2_idct_add_c; | |
4346 c->idct = ff_wmv2_idct_c; | |
4347 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
6407 | 4348 }else if(avctx->idct_algo==FF_IDCT_FAAN){ |
4349 c->idct_put= ff_faanidct_put; | |
4350 c->idct_add= ff_faanidct_add; | |
4351 c->idct = ff_faanidct; | |
4352 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4353 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) { |
8120 | 4354 c->idct_put= ff_ea_idct_put_c; |
4355 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
2256 | 4356 }else{ //accurate/default |
6001 | 4357 c->idct_put= ff_simple_idct_put; |
4358 c->idct_add= ff_simple_idct_add; | |
4359 c->idct = ff_simple_idct; | |
2256 | 4360 c->idct_permutation_type= FF_NO_IDCT_PERM; |
4361 } | |
1092 | 4362 } |
4363 | |
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4364 if (CONFIG_H264_DECODER) { |
5065 | 4365 c->h264_idct_add= ff_h264_idct_add_c; |
4366 c->h264_idct8_add= ff_h264_idct8_add_c; | |
4367 c->h264_idct_dc_add= ff_h264_idct_dc_add_c; | |
4368 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c; | |
8375
de2509cf3c44
H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents:
8359
diff
changeset
|
4369 c->h264_idct_add16 = ff_h264_idct_add16_c; |
de2509cf3c44
H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents:
8359
diff
changeset
|
4370 c->h264_idct8_add4 = ff_h264_idct8_add4_c; |
de2509cf3c44
H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents:
8359
diff
changeset
|
4371 c->h264_idct_add8 = ff_h264_idct_add8_c; |
de2509cf3c44
H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents:
8359
diff
changeset
|
4372 c->h264_idct_add16intra= ff_h264_idct_add16intra_c; |
5064 | 4373 } |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
4374 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4375 c->get_pixels = get_pixels_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4376 c->diff_pixels = diff_pixels_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4377 c->put_pixels_clamped = put_pixels_clamped_c; |
1984
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
4378 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4379 c->add_pixels_clamped = add_pixels_clamped_c; |
2763 | 4380 c->add_pixels8 = add_pixels8_c; |
4381 c->add_pixels4 = add_pixels4_c; | |
4988
689490842cf5
factor sum_abs_dctelem out of dct_sad, and simd it.
lorenm
parents:
4749
diff
changeset
|
4382 c->sum_abs_dctelem = sum_abs_dctelem_c; |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4383 c->gmc1 = gmc1_c; |
3248
7aa9f80e7954
mmx implementation of 3-point GMC. (5x faster than C)
lorenm
parents:
3245
diff
changeset
|
4384 c->gmc = ff_gmc_c; |
8288 | 4385 c->clear_block = clear_block_c; |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4386 c->clear_blocks = clear_blocks_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4387 c->pix_sum = pix_sum_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4388 c->pix_norm1 = pix_norm1_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4389 |
859 | 4390 /* TODO [0] 16 [1] 8 */ |
1708 | 4391 c->pix_abs[0][0] = pix_abs16_c; |
4392 c->pix_abs[0][1] = pix_abs16_x2_c; | |
4393 c->pix_abs[0][2] = pix_abs16_y2_c; | |
4394 c->pix_abs[0][3] = pix_abs16_xy2_c; | |
4395 c->pix_abs[1][0] = pix_abs8_c; | |
4396 c->pix_abs[1][1] = pix_abs8_x2_c; | |
4397 c->pix_abs[1][2] = pix_abs8_y2_c; | |
4398 c->pix_abs[1][3] = pix_abs8_xy2_c; | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4399 |
859 | 4400 #define dspfunc(PFX, IDX, NUM) \ |
4401 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \ | |
4402 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \ | |
4403 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \ | |
4404 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
4405 |
859 | 4406 dspfunc(put, 0, 16); |
4407 dspfunc(put_no_rnd, 0, 16); | |
4408 dspfunc(put, 1, 8); | |
4409 dspfunc(put_no_rnd, 1, 8); | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4410 dspfunc(put, 2, 4); |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4411 dspfunc(put, 3, 2); |
0 | 4412 |
859 | 4413 dspfunc(avg, 0, 16); |
4414 dspfunc(avg_no_rnd, 0, 16); | |
4415 dspfunc(avg, 1, 8); | |
4416 dspfunc(avg_no_rnd, 1, 8); | |
1319 | 4417 dspfunc(avg, 2, 4); |
4418 dspfunc(avg, 3, 2); | |
859 | 4419 #undef dspfunc |
857 | 4420 |
1864 | 4421 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c; |
4422 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c; | |
4423 | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4424 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4425 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4426 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4427 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4428 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4429 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4430 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4431 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4432 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
4433 |
1319 | 4434 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; |
4435 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; | |
4436 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; | |
4437 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; | |
4438 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; | |
4439 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; | |
4440 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; | |
4441 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; | |
4442 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; | |
4443 | |
859 | 4444 #define dspfunc(PFX, IDX, NUM) \ |
4445 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \ | |
4446 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \ | |
4447 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \ | |
4448 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \ | |
4449 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \ | |
4450 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \ | |
4451 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \ | |
4452 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \ | |
4453 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \ | |
4454 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \ | |
4455 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \ | |
4456 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \ | |
4457 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \ | |
4458 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \ | |
4459 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \ | |
4460 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c | |
857 | 4461 |
859 | 4462 dspfunc(put_qpel, 0, 16); |
4463 dspfunc(put_no_rnd_qpel, 0, 16); | |
4464 | |
4465 dspfunc(avg_qpel, 0, 16); | |
4466 /* dspfunc(avg_no_rnd_qpel, 0, 16); */ | |
857 | 4467 |
859 | 4468 dspfunc(put_qpel, 1, 8); |
4469 dspfunc(put_no_rnd_qpel, 1, 8); | |
4470 | |
4471 dspfunc(avg_qpel, 1, 8); | |
4472 /* dspfunc(avg_no_rnd_qpel, 1, 8); */ | |
1168 | 4473 |
4474 dspfunc(put_h264_qpel, 0, 16); | |
4475 dspfunc(put_h264_qpel, 1, 8); | |
4476 dspfunc(put_h264_qpel, 2, 4); | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
4477 dspfunc(put_h264_qpel, 3, 2); |
1168 | 4478 dspfunc(avg_h264_qpel, 0, 16); |
4479 dspfunc(avg_h264_qpel, 1, 8); | |
4480 dspfunc(avg_h264_qpel, 2, 4); | |
4481 | |
859 | 4482 #undef dspfunc |
1168 | 4483 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c; |
4484 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c; | |
4485 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c; | |
4486 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c; | |
4487 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c; | |
4488 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c; | |
3663 | 4489 c->put_no_rnd_h264_chroma_pixels_tab[0]= put_no_rnd_h264_chroma_mc8_c; |
857 | 4490 |
2415 | 4491 c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c; |
4492 c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c; | |
4493 c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c; | |
4494 c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c; | |
4495 c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c; | |
4496 c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c; | |
4497 c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c; | |
4498 c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c; | |
4499 c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c; | |
4500 c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c; | |
4501 c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c; | |
4502 c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c; | |
4503 c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c; | |
4504 c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c; | |
4505 c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c; | |
4506 c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c; | |
4507 c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c; | |
4508 c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c; | |
4509 c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c; | |
4510 c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c; | |
4511 | |
6437 | 4512 c->draw_edges = draw_edges_c; |
4513 | |
8590 | 4514 #if CONFIG_CAVS_DECODER |
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
4515 ff_cavsdsp_init(c,avctx); |
3432 | 4516 #endif |
8590 | 4517 #if CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER |
3526 | 4518 ff_vc1dsp_init(c,avctx); |
4519 #endif | |
8590 | 4520 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER || CONFIG_WMV3_DECODER |
5887 | 4521 ff_intrax8dsp_init(c,avctx); |
4522 #endif | |
8590 | 4523 #if CONFIG_RV30_DECODER |
8410 | 4524 ff_rv30dsp_init(c,avctx); |
4525 #endif | |
8590 | 4526 #if CONFIG_RV40_DECODER |
8232 | 4527 ff_rv40dsp_init(c,avctx); |
4528 c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c; | |
4529 c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c; | |
4530 c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c; | |
4531 c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c; | |
4532 #endif | |
3395
adccbf4a1040
CAVS decoder by (Stefan Gehrer stefan.gehrer gmx.de)
michael
parents:
3373
diff
changeset
|
4533 |
936 | 4534 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; |
4535 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; | |
4536 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; | |
4537 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; | |
4538 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; | |
4539 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; | |
4540 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; | |
4541 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; | |
2967 | 4542 |
1708 | 4543 #define SET_CMP_FUNC(name) \ |
4544 c->name[0]= name ## 16_c;\ | |
4545 c->name[1]= name ## 8x8_c; | |
2967 | 4546 |
1708 | 4547 SET_CMP_FUNC(hadamard8_diff) |
1729 | 4548 c->hadamard8_diff[4]= hadamard8_intra16_c; |
8978 | 4549 c->hadamard8_diff[5]= hadamard8_intra8x8_c; |
1708 | 4550 SET_CMP_FUNC(dct_sad) |
2382 | 4551 SET_CMP_FUNC(dct_max) |
8590 | 4552 #if CONFIG_GPL |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
4553 SET_CMP_FUNC(dct264_sad) |
3013 | 4554 #endif |
1708 | 4555 c->sad[0]= pix_abs16_c; |
4556 c->sad[1]= pix_abs8_c; | |
4557 c->sse[0]= sse16_c; | |
4558 c->sse[1]= sse8_c; | |
2184 | 4559 c->sse[2]= sse4_c; |
1708 | 4560 SET_CMP_FUNC(quant_psnr) |
4561 SET_CMP_FUNC(rd) | |
4562 SET_CMP_FUNC(bit) | |
1729 | 4563 c->vsad[0]= vsad16_c; |
4564 c->vsad[4]= vsad_intra16_c; | |
8978 | 4565 c->vsad[5]= vsad_intra8_c; |
1729 | 4566 c->vsse[0]= vsse16_c; |
4567 c->vsse[4]= vsse_intra16_c; | |
8978 | 4568 c->vsse[5]= vsse_intra8_c; |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
4569 c->nsse[0]= nsse16_c; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
4570 c->nsse[1]= nsse8_c; |
8590 | 4571 #if CONFIG_SNOW_ENCODER |
2184 | 4572 c->w53[0]= w53_16_c; |
4573 c->w53[1]= w53_8_c; | |
4574 c->w97[0]= w97_16_c; | |
4575 c->w97[1]= w97_8_c; | |
3373
b8996cc5ccae
Disable w53 and w97 cmp methods when snow encoder is disabled
gpoirier
parents:
3323
diff
changeset
|
4576 #endif |
2184 | 4577 |
4749 | 4578 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; |
4579 | |
866 | 4580 c->add_bytes= add_bytes_c; |
6384 | 4581 c->add_bytes_l2= add_bytes_l2_c; |
866 | 4582 c->diff_bytes= diff_bytes_c; |
8760 | 4583 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c; |
1527 | 4584 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; |
1273 | 4585 c->bswap_buf= bswap_buf; |
8590 | 4586 #if CONFIG_PNG_DECODER |
6384 | 4587 c->add_png_paeth_prediction= ff_add_png_paeth_prediction; |
4588 #endif | |
2633 | 4589 |
4590 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c; | |
4591 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; | |
8395
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
4592 c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c; |
195cba8f6257
Move filter_luma_intra into dsputil for later addition of asm.
darkshikari
parents:
8375
diff
changeset
|
4593 c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c; |
2633 | 4594 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c; |
4595 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c; | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
4596 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
4597 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c; |
3645
47821be55b6c
mmx implementation of deblocking strength decision.
lorenm
parents:
3568
diff
changeset
|
4598 c->h264_loop_filter_strength= NULL; |
2967 | 4599 |
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4600 if (CONFIG_ANY_H263) { |
5278 | 4601 c->h263_h_loop_filter= h263_h_loop_filter_c; |
4602 c->h263_v_loop_filter= h263_v_loop_filter_c; | |
5277
7b3fcb7c61ce
Avoid linking with h263.c functions when the relevant codecs
aurel
parents:
5256
diff
changeset
|
4603 } |
2967 | 4604 |
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4605 if (CONFIG_VP3_DECODER || CONFIG_THEORA_DECODER) { |
7995 | 4606 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c; |
4607 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c; | |
4608 } | |
8785
bee83b3f9a6b
move vp6_filter_diag4() to a new vp6dsp.c file and use it throught dsputil
aurel
parents:
8760
diff
changeset
|
4609 if (CONFIG_VP6_DECODER) { |
bee83b3f9a6b
move vp6_filter_diag4() to a new vp6dsp.c file and use it throught dsputil
aurel
parents:
8760
diff
changeset
|
4610 c->vp6_filter_diag4= ff_vp6_filter_diag4_c; |
bee83b3f9a6b
move vp6_filter_diag4() to a new vp6dsp.c file and use it throught dsputil
aurel
parents:
8760
diff
changeset
|
4611 } |
7995 | 4612 |
2045 | 4613 c->h261_loop_filter= h261_loop_filter_c; |
2967 | 4614 |
1784 | 4615 c->try_8x8basis= try_8x8basis_c; |
4616 c->add_8x8basis= add_8x8basis_c; | |
866 | 4617 |
8590 | 4618 #if CONFIG_SNOW_DECODER |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3105
diff
changeset
|
4619 c->vertical_compose97i = ff_snow_vertical_compose97i; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3105
diff
changeset
|
4620 c->horizontal_compose97i = ff_snow_horizontal_compose97i; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3105
diff
changeset
|
4621 c->inner_add_yblock = ff_snow_inner_add_yblock; |
3199
1651e69b9f7a
10l: Only set *compose97i *add_yblock to dsputils context if we are building with Snow enabled
gpoirier
parents:
3198
diff
changeset
|
4622 #endif |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3105
diff
changeset
|
4623 |
8590 | 4624 #if CONFIG_VORBIS_DECODER |
3536
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
4625 c->vorbis_inverse_coupling = vorbis_inverse_coupling; |
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
4626 #endif |
8590 | 4627 #if CONFIG_AC3_DECODER |
7563 | 4628 c->ac3_downmix = ff_ac3_downmix_c; |
4629 #endif | |
8590 | 4630 #if CONFIG_FLAC_ENCODER |
5737 | 4631 c->flac_compute_autocorr = ff_flac_compute_autocorr; |
4632 #endif | |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4633 c->vector_fmul = vector_fmul_c; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4634 c->vector_fmul_reverse = vector_fmul_reverse_c; |
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4635 c->vector_fmul_add_add = ff_vector_fmul_add_add_c; |
7261 | 4636 c->vector_fmul_window = ff_vector_fmul_window_c; |
7564 | 4637 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c; |
3568
945caa35ee9a
sse and 3dnow implementations of float->int conversion and mdct windowing.
lorenm
parents:
3536
diff
changeset
|
4638 c->float_to_int16 = ff_float_to_int16_c; |
7261 | 4639 c->float_to_int16_interleave = ff_float_to_int16_interleave_c; |
7203
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4640 c->add_int16 = add_int16_c; |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4641 c->sub_int16 = sub_int16_c; |
87b1dfb5a98d
Add several vector functions used by Monkey's Audio decoder to dsputil
kostya
parents:
6719
diff
changeset
|
4642 c->scalarproduct_int16 = scalarproduct_int16_c; |
3536
545a15c19c91
sse & sse2 implementations of vorbis channel coupling.
lorenm
parents:
3526
diff
changeset
|
4643 |
3245 | 4644 c->shrink[0]= ff_img_copy_plane; |
4645 c->shrink[1]= ff_shrink22; | |
4646 c->shrink[2]= ff_shrink44; | |
4647 c->shrink[3]= ff_shrink88; | |
4648 | |
3215
06f98047ff26
prefetch pixels for future motion compensation. 2-5% faster h264.
lorenm
parents:
3199
diff
changeset
|
4649 c->prefetch= just_return; |
06f98047ff26
prefetch pixels for future motion compensation. 2-5% faster h264.
lorenm
parents:
3199
diff
changeset
|
4650 |
3807
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4651 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab)); |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4652 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab)); |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4653 |
8596
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4654 if (HAVE_MMX) dsputil_init_mmx (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4655 if (ARCH_ARM) dsputil_init_arm (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4656 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4657 if (HAVE_VIS) dsputil_init_vis (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4658 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4659 if (ARCH_PPC) dsputil_init_ppc (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4660 if (HAVE_MMI) dsputil_init_mmi (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4661 if (ARCH_SH4) dsputil_init_sh4 (c, avctx); |
68e959302527
replace all occurrence of ENABLE_ by the corresponding CONFIG_, HAVE_ or ARCH_
aurel
parents:
8590
diff
changeset
|
4662 if (ARCH_BFIN) dsputil_init_bfin (c, avctx); |
1092 | 4663 |
3807
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4664 for(i=0; i<64; i++){ |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4665 if(!c->put_2tap_qpel_pixels_tab[0][i]) |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4666 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i]; |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4667 if(!c->avg_2tap_qpel_pixels_tab[0][i]) |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4668 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i]; |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4669 } |
6a40092eb9e6
approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast.
lorenm
parents:
3728
diff
changeset
|
4670 |
1092 | 4671 switch(c->idct_permutation_type){ |
4672 case FF_NO_IDCT_PERM: | |
4673 for(i=0; i<64; i++) | |
4674 c->idct_permutation[i]= i; | |
4675 break; | |
4676 case FF_LIBMPEG2_IDCT_PERM: | |
4677 for(i=0; i<64; i++) | |
4678 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |
4679 break; | |
4680 case FF_SIMPLE_IDCT_PERM: | |
4681 for(i=0; i<64; i++) | |
4682 c->idct_permutation[i]= simple_mmx_permutation[i]; | |
4683 break; | |
4684 case FF_TRANSPOSE_IDCT_PERM: | |
4685 for(i=0; i<64; i++) | |
4686 c->idct_permutation[i]= ((i&7)<<3) | (i>>3); | |
4687 break; | |
2696
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4688 case FF_PARTTRANS_IDCT_PERM: |
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4689 for(i=0; i<64; i++) |
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4690 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); |
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4691 break; |
6600
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
4692 case FF_SSE2_IDCT_PERM: |
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
4693 for(i=0; i<64; i++) |
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
4694 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7]; |
c3213c91124c
Add a new IDCT permutation, used in xvid_sse2 and possibly future similar IDCTs.
astrange
parents:
6450
diff
changeset
|
4695 break; |
1092 | 4696 default: |
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1571
diff
changeset
|
4697 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); |
1092 | 4698 } |
0 | 4699 } |
252
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
4700 |