Mercurial > libavcodec.hg
annotate dsputil.c @ 3198:6b9f0c4fbdbe libavcodec
First part of a series of speed-enchancing patches.
This one sets up a snow.h and makes snow use the dsputil function pointer
framework to access the three functions that will be implemented in asm
in the other parts of the patchset.
Patch by Robert Edele < yartrebo AH earthlink POIS net>
Original thread:
Subject: [Ffmpeg-devel] [PATCH] Snow mmx+sse2 asm optimizations
Date: Sun, 05 Feb 2006 12:47:14 -0500
author | gpoirier |
---|---|
date | Thu, 16 Mar 2006 19:18:18 +0000 |
parents | 2d35fb3cb940 |
children | 1651e69b9f7a |
rev | line source |
---|---|
0 | 1 /* |
2 * DSP utils | |
429 | 3 * Copyright (c) 2000, 2001 Fabrice Bellard. |
1739
07a484280a82
copyright year update of the files i touched and remembered, things look annoyingly unmaintained otherwise
michael
parents:
1729
diff
changeset
|
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
0 | 5 * |
429 | 6 * This library is free software; you can redistribute it and/or |
7 * modify it under the terms of the GNU Lesser General Public | |
8 * License as published by the Free Software Foundation; either | |
9 * version 2 of the License, or (at your option) any later version. | |
0 | 10 * |
429 | 11 * This library is distributed in the hope that it will be useful, |
0 | 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 * Lesser General Public License for more details. | |
0 | 15 * |
429 | 16 * You should have received a copy of the GNU Lesser General Public |
17 * License along with this library; if not, write to the Free Software | |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3029
diff
changeset
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
256 | 19 * |
385 | 20 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> |
0 | 21 */ |
2967 | 22 |
1106 | 23 /** |
24 * @file dsputil.c | |
25 * DSP utils | |
26 */ | |
2967 | 27 |
0 | 28 #include "avcodec.h" |
29 #include "dsputil.h" | |
936 | 30 #include "mpegvideo.h" |
1092 | 31 #include "simple_idct.h" |
1557 | 32 #include "faandct.h" |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3105
diff
changeset
|
33 #include "snow.h" |
676 | 34 |
2522
e25782262d7d
kill warnings patch by (Mns Rullgrd <mru inprovide com>)
michael
parents:
2448
diff
changeset
|
35 /* snow.c */ |
e25782262d7d
kill warnings patch by (Mns Rullgrd <mru inprovide com>)
michael
parents:
2448
diff
changeset
|
36 void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count); |
e25782262d7d
kill warnings patch by (Mns Rullgrd <mru inprovide com>)
michael
parents:
2448
diff
changeset
|
37 |
2169
db8baace74d8
Minor Patch for shared libs on Mac OSX by (Bill May <wmay at cisco dot com>)
michael
parents:
2066
diff
changeset
|
38 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; |
db8baace74d8
Minor Patch for shared libs on Mac OSX by (Bill May <wmay at cisco dot com>)
michael
parents:
2066
diff
changeset
|
39 uint32_t squareTbl[512] = {0, }; |
0 | 40 |
1064 | 41 const uint8_t ff_zigzag_direct[64] = { |
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
42 0, 1, 8, 16, 9, 2, 3, 10, |
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
43 17, 24, 32, 25, 18, 11, 4, 5, |
34 | 44 12, 19, 26, 33, 40, 48, 41, 34, |
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
45 27, 20, 13, 6, 7, 14, 21, 28, |
34 | 46 35, 42, 49, 56, 57, 50, 43, 36, |
47 29, 22, 15, 23, 30, 37, 44, 51, | |
48 58, 59, 52, 45, 38, 31, 39, 46, | |
49 53, 60, 61, 54, 47, 55, 62, 63 | |
50 }; | |
51 | |
1567 | 52 /* Specific zigzag scan for 248 idct. NOTE that unlike the |
53 specification, we interleave the fields */ | |
54 const uint8_t ff_zigzag248_direct[64] = { | |
55 0, 8, 1, 9, 16, 24, 2, 10, | |
56 17, 25, 32, 40, 48, 56, 33, 41, | |
57 18, 26, 3, 11, 4, 12, 19, 27, | |
58 34, 42, 49, 57, 50, 58, 35, 43, | |
59 20, 28, 5, 13, 6, 14, 21, 29, | |
60 36, 44, 51, 59, 52, 60, 37, 45, | |
61 22, 30, 7, 15, 23, 31, 38, 46, | |
62 53, 61, 54, 62, 39, 47, 55, 63, | |
63 }; | |
64 | |
220 | 65 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ |
3089 | 66 DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16[64]) = {0, }; |
220 | 67 |
1064 | 68 const uint8_t ff_alternate_horizontal_scan[64] = { |
2967 | 69 0, 1, 2, 3, 8, 9, 16, 17, |
34 | 70 10, 11, 4, 5, 6, 7, 15, 14, |
2967 | 71 13, 12, 19, 18, 24, 25, 32, 33, |
34 | 72 26, 27, 20, 21, 22, 23, 28, 29, |
2967 | 73 30, 31, 34, 35, 40, 41, 48, 49, |
34 | 74 42, 43, 36, 37, 38, 39, 44, 45, |
2967 | 75 46, 47, 50, 51, 56, 57, 58, 59, |
34 | 76 52, 53, 54, 55, 60, 61, 62, 63, |
77 }; | |
78 | |
1064 | 79 const uint8_t ff_alternate_vertical_scan[64] = { |
2967 | 80 0, 8, 16, 24, 1, 9, 2, 10, |
34 | 81 17, 25, 32, 40, 48, 56, 57, 49, |
2967 | 82 41, 33, 26, 18, 3, 11, 4, 12, |
34 | 83 19, 27, 34, 42, 50, 58, 35, 43, |
2967 | 84 51, 59, 20, 28, 5, 13, 6, 14, |
34 | 85 21, 29, 36, 44, 52, 60, 37, 45, |
2967 | 86 53, 61, 22, 30, 7, 15, 23, 31, |
34 | 87 38, 46, 54, 62, 39, 47, 55, 63, |
88 }; | |
89 | |
220 | 90 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ |
1064 | 91 const uint32_t inverse[256]={ |
2967 | 92 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, |
93 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, | |
94 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, | |
95 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, | |
96 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, | |
97 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, | |
98 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, | |
99 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, | |
100 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, | |
101 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, | |
102 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, | |
103 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, | |
104 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, | |
105 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, | |
106 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, | |
107 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, | |
108 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, | |
109 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, | |
110 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, | |
111 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, | |
112 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, | |
113 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, | |
114 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, | |
115 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, | |
116 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, | |
117 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, | |
118 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, | |
119 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, | |
120 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, | |
121 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, | |
122 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, | |
220 | 123 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, |
124 }; | |
125 | |
1092 | 126 /* Input permutation for the simple_idct_mmx */ |
127 static const uint8_t simple_mmx_permutation[64]={ | |
2979 | 128 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, |
129 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |
130 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |
131 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |
132 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |
133 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |
134 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |
135 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |
1092 | 136 }; |
137 | |
1064 | 138 static int pix_sum_c(uint8_t * pix, int line_size) |
612 | 139 { |
140 int s, i, j; | |
141 | |
142 s = 0; | |
143 for (i = 0; i < 16; i++) { | |
2979 | 144 for (j = 0; j < 16; j += 8) { |
145 s += pix[0]; | |
146 s += pix[1]; | |
147 s += pix[2]; | |
148 s += pix[3]; | |
149 s += pix[4]; | |
150 s += pix[5]; | |
151 s += pix[6]; | |
152 s += pix[7]; | |
153 pix += 8; | |
154 } | |
155 pix += line_size - 16; | |
612 | 156 } |
157 return s; | |
158 } | |
159 | |
1064 | 160 static int pix_norm1_c(uint8_t * pix, int line_size) |
612 | 161 { |
162 int s, i, j; | |
1064 | 163 uint32_t *sq = squareTbl + 256; |
612 | 164 |
165 s = 0; | |
166 for (i = 0; i < 16; i++) { | |
2979 | 167 for (j = 0; j < 16; j += 8) { |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
168 #if 0 |
2979 | 169 s += sq[pix[0]]; |
170 s += sq[pix[1]]; | |
171 s += sq[pix[2]]; | |
172 s += sq[pix[3]]; | |
173 s += sq[pix[4]]; | |
174 s += sq[pix[5]]; | |
175 s += sq[pix[6]]; | |
176 s += sq[pix[7]]; | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
177 #else |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
178 #if LONG_MAX > 2147483647 |
2979 | 179 register uint64_t x=*(uint64_t*)pix; |
180 s += sq[x&0xff]; | |
181 s += sq[(x>>8)&0xff]; | |
182 s += sq[(x>>16)&0xff]; | |
183 s += sq[(x>>24)&0xff]; | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
184 s += sq[(x>>32)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
185 s += sq[(x>>40)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
186 s += sq[(x>>48)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
187 s += sq[(x>>56)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
188 #else |
2979 | 189 register uint32_t x=*(uint32_t*)pix; |
190 s += sq[x&0xff]; | |
191 s += sq[(x>>8)&0xff]; | |
192 s += sq[(x>>16)&0xff]; | |
193 s += sq[(x>>24)&0xff]; | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
194 x=*(uint32_t*)(pix+4); |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
195 s += sq[x&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
196 s += sq[(x>>8)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
197 s += sq[(x>>16)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
198 s += sq[(x>>24)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
199 #endif |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
200 #endif |
2979 | 201 pix += 8; |
202 } | |
203 pix += line_size - 16; | |
612 | 204 } |
205 return s; | |
206 } | |
207 | |
1273 | 208 static void bswap_buf(uint32_t *dst, uint32_t *src, int w){ |
209 int i; | |
2967 | 210 |
1273 | 211 for(i=0; i+8<=w; i+=8){ |
212 dst[i+0]= bswap_32(src[i+0]); | |
213 dst[i+1]= bswap_32(src[i+1]); | |
214 dst[i+2]= bswap_32(src[i+2]); | |
215 dst[i+3]= bswap_32(src[i+3]); | |
216 dst[i+4]= bswap_32(src[i+4]); | |
217 dst[i+5]= bswap_32(src[i+5]); | |
218 dst[i+6]= bswap_32(src[i+6]); | |
219 dst[i+7]= bswap_32(src[i+7]); | |
220 } | |
221 for(;i<w; i++){ | |
222 dst[i+0]= bswap_32(src[i+0]); | |
223 } | |
224 } | |
612 | 225 |
2184 | 226 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) |
227 { | |
228 int s, i; | |
229 uint32_t *sq = squareTbl + 256; | |
230 | |
231 s = 0; | |
232 for (i = 0; i < h; i++) { | |
233 s += sq[pix1[0] - pix2[0]]; | |
234 s += sq[pix1[1] - pix2[1]]; | |
235 s += sq[pix1[2] - pix2[2]]; | |
236 s += sq[pix1[3] - pix2[3]]; | |
237 pix1 += line_size; | |
238 pix2 += line_size; | |
239 } | |
240 return s; | |
241 } | |
242 | |
1708 | 243 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) |
936 | 244 { |
245 int s, i; | |
1064 | 246 uint32_t *sq = squareTbl + 256; |
936 | 247 |
248 s = 0; | |
1708 | 249 for (i = 0; i < h; i++) { |
936 | 250 s += sq[pix1[0] - pix2[0]]; |
251 s += sq[pix1[1] - pix2[1]]; | |
252 s += sq[pix1[2] - pix2[2]]; | |
253 s += sq[pix1[3] - pix2[3]]; | |
254 s += sq[pix1[4] - pix2[4]]; | |
255 s += sq[pix1[5] - pix2[5]]; | |
256 s += sq[pix1[6] - pix2[6]]; | |
257 s += sq[pix1[7] - pix2[7]]; | |
258 pix1 += line_size; | |
259 pix2 += line_size; | |
260 } | |
261 return s; | |
262 } | |
263 | |
1708 | 264 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
884 | 265 { |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
266 int s, i; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
267 uint32_t *sq = squareTbl + 256; |
884 | 268 |
269 s = 0; | |
1708 | 270 for (i = 0; i < h; i++) { |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
271 s += sq[pix1[ 0] - pix2[ 0]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
272 s += sq[pix1[ 1] - pix2[ 1]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
273 s += sq[pix1[ 2] - pix2[ 2]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
274 s += sq[pix1[ 3] - pix2[ 3]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
275 s += sq[pix1[ 4] - pix2[ 4]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
276 s += sq[pix1[ 5] - pix2[ 5]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
277 s += sq[pix1[ 6] - pix2[ 6]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
278 s += sq[pix1[ 7] - pix2[ 7]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
279 s += sq[pix1[ 8] - pix2[ 8]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
280 s += sq[pix1[ 9] - pix2[ 9]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
281 s += sq[pix1[10] - pix2[10]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
282 s += sq[pix1[11] - pix2[11]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
283 s += sq[pix1[12] - pix2[12]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
284 s += sq[pix1[13] - pix2[13]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
285 s += sq[pix1[14] - pix2[14]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
286 s += sq[pix1[15] - pix2[15]]; |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
287 |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
288 pix1 += line_size; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
289 pix2 += line_size; |
884 | 290 } |
291 return s; | |
292 } | |
293 | |
2184 | 294 |
295 static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){ | |
2639 | 296 #ifdef CONFIG_SNOW_ENCODER //idwt is in snow.c |
2184 | 297 int s, i, j; |
298 const int dec_count= w==8 ? 3 : 4; | |
299 int tmp[16*16]; | |
300 #if 0 | |
301 int level, ori; | |
2967 | 302 static const int scale[2][2][4][4]={ |
2184 | 303 { |
304 { | |
305 //8x8 dec=3 | |
306 {268, 239, 239, 213}, | |
307 { 0, 224, 224, 152}, | |
308 { 0, 135, 135, 110}, | |
309 },{ | |
310 //16x16 dec=4 | |
311 {344, 310, 310, 280}, | |
312 { 0, 320, 320, 228}, | |
313 { 0, 175, 175, 136}, | |
314 { 0, 129, 129, 102}, | |
315 } | |
316 },{ | |
317 {//FIXME 5/3 | |
318 //8x8 dec=3 | |
319 {275, 245, 245, 218}, | |
320 { 0, 230, 230, 156}, | |
321 { 0, 138, 138, 113}, | |
322 },{ | |
323 //16x16 dec=4 | |
324 {352, 317, 317, 286}, | |
325 { 0, 328, 328, 233}, | |
326 { 0, 180, 180, 140}, | |
327 { 0, 132, 132, 105}, | |
328 } | |
329 } | |
330 }; | |
331 #endif | |
332 | |
333 for (i = 0; i < h; i++) { | |
334 for (j = 0; j < w; j+=4) { | |
335 tmp[16*i+j+0] = (pix1[j+0] - pix2[j+0])<<4; | |
336 tmp[16*i+j+1] = (pix1[j+1] - pix2[j+1])<<4; | |
337 tmp[16*i+j+2] = (pix1[j+2] - pix2[j+2])<<4; | |
338 tmp[16*i+j+3] = (pix1[j+3] - pix2[j+3])<<4; | |
339 } | |
340 pix1 += line_size; | |
341 pix2 += line_size; | |
342 } | |
2639 | 343 |
2184 | 344 ff_spatial_dwt(tmp, w, h, 16, type, dec_count); |
345 | |
346 s=0; | |
347 #if 0 | |
348 for(level=0; level<dec_count; level++){ | |
349 for(ori= level ? 1 : 0; ori<4; ori++){ | |
350 int sx= (ori&1) ? 1<<level: 0; | |
351 int stride= 16<<(dec_count-level); | |
352 int sy= (ori&2) ? stride>>1 : 0; | |
353 int size= 1<<level; | |
2967 | 354 |
2184 | 355 for(i=0; i<size; i++){ |
356 for(j=0; j<size; j++){ | |
357 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori]; | |
358 s += ABS(v); | |
359 } | |
360 } | |
361 } | |
362 } | |
363 #endif | |
364 for (i = 0; i < h; i++) { | |
365 for (j = 0; j < w; j+=4) { | |
366 s+= ABS(tmp[16*i+j+0]); | |
367 s+= ABS(tmp[16*i+j+1]); | |
368 s+= ABS(tmp[16*i+j+2]); | |
369 s+= ABS(tmp[16*i+j+3]); | |
370 } | |
371 } | |
2967 | 372 assert(s>=0); |
373 | |
2184 | 374 return s>>2; |
2639 | 375 #endif |
2184 | 376 } |
377 | |
378 static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | |
379 return w_c(v, pix1, pix2, line_size, 8, h, 1); | |
380 } | |
381 | |
382 static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | |
383 return w_c(v, pix1, pix2, line_size, 8, h, 0); | |
384 } | |
385 | |
386 static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | |
387 return w_c(v, pix1, pix2, line_size, 16, h, 1); | |
388 } | |
389 | |
390 static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | |
391 return w_c(v, pix1, pix2, line_size, 16, h, 0); | |
392 } | |
393 | |
1064 | 394 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) |
0 | 395 { |
396 int i; | |
397 | |
398 /* read the pixels */ | |
399 for(i=0;i<8;i++) { | |
516 | 400 block[0] = pixels[0]; |
401 block[1] = pixels[1]; | |
402 block[2] = pixels[2]; | |
403 block[3] = pixels[3]; | |
404 block[4] = pixels[4]; | |
405 block[5] = pixels[5]; | |
406 block[6] = pixels[6]; | |
407 block[7] = pixels[7]; | |
408 pixels += line_size; | |
409 block += 8; | |
0 | 410 } |
411 } | |
412 | |
1064 | 413 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, |
2979 | 414 const uint8_t *s2, int stride){ |
324 | 415 int i; |
416 | |
417 /* read the pixels */ | |
418 for(i=0;i<8;i++) { | |
516 | 419 block[0] = s1[0] - s2[0]; |
420 block[1] = s1[1] - s2[1]; | |
421 block[2] = s1[2] - s2[2]; | |
422 block[3] = s1[3] - s2[3]; | |
423 block[4] = s1[4] - s2[4]; | |
424 block[5] = s1[5] - s2[5]; | |
425 block[6] = s1[6] - s2[6]; | |
426 block[7] = s1[7] - s2[7]; | |
324 | 427 s1 += stride; |
428 s2 += stride; | |
516 | 429 block += 8; |
324 | 430 } |
431 } | |
432 | |
433 | |
1064 | 434 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
2979 | 435 int line_size) |
0 | 436 { |
437 int i; | |
1064 | 438 uint8_t *cm = cropTbl + MAX_NEG_CROP; |
2967 | 439 |
0 | 440 /* read the pixels */ |
441 for(i=0;i<8;i++) { | |
516 | 442 pixels[0] = cm[block[0]]; |
443 pixels[1] = cm[block[1]]; | |
444 pixels[2] = cm[block[2]]; | |
445 pixels[3] = cm[block[3]]; | |
446 pixels[4] = cm[block[4]]; | |
447 pixels[5] = cm[block[5]]; | |
448 pixels[6] = cm[block[6]]; | |
449 pixels[7] = cm[block[7]]; | |
450 | |
451 pixels += line_size; | |
452 block += 8; | |
0 | 453 } |
454 } | |
455 | |
2256 | 456 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, |
2979 | 457 int line_size) |
2256 | 458 { |
459 int i; | |
460 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
2967 | 461 |
2256 | 462 /* read the pixels */ |
463 for(i=0;i<4;i++) { | |
464 pixels[0] = cm[block[0]]; | |
465 pixels[1] = cm[block[1]]; | |
466 pixels[2] = cm[block[2]]; | |
467 pixels[3] = cm[block[3]]; | |
468 | |
469 pixels += line_size; | |
470 block += 8; | |
471 } | |
472 } | |
473 | |
2257 | 474 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, |
2979 | 475 int line_size) |
2257 | 476 { |
477 int i; | |
478 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
2967 | 479 |
2257 | 480 /* read the pixels */ |
481 for(i=0;i<2;i++) { | |
482 pixels[0] = cm[block[0]]; | |
483 pixels[1] = cm[block[1]]; | |
484 | |
485 pixels += line_size; | |
486 block += 8; | |
487 } | |
488 } | |
489 | |
2967 | 490 static void put_signed_pixels_clamped_c(const DCTELEM *block, |
1984
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
491 uint8_t *restrict pixels, |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
492 int line_size) |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
493 { |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
494 int i, j; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
495 |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
496 for (i = 0; i < 8; i++) { |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
497 for (j = 0; j < 8; j++) { |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
498 if (*block < -128) |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
499 *pixels = 0; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
500 else if (*block > 127) |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
501 *pixels = 255; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
502 else |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
503 *pixels = (uint8_t)(*block + 128); |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
504 block++; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
505 pixels++; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
506 } |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
507 pixels += (line_size - 8); |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
508 } |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
509 } |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
510 |
1064 | 511 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
516 | 512 int line_size) |
0 | 513 { |
514 int i; | |
1064 | 515 uint8_t *cm = cropTbl + MAX_NEG_CROP; |
2967 | 516 |
0 | 517 /* read the pixels */ |
518 for(i=0;i<8;i++) { | |
516 | 519 pixels[0] = cm[pixels[0] + block[0]]; |
520 pixels[1] = cm[pixels[1] + block[1]]; | |
521 pixels[2] = cm[pixels[2] + block[2]]; | |
522 pixels[3] = cm[pixels[3] + block[3]]; | |
523 pixels[4] = cm[pixels[4] + block[4]]; | |
524 pixels[5] = cm[pixels[5] + block[5]]; | |
525 pixels[6] = cm[pixels[6] + block[6]]; | |
526 pixels[7] = cm[pixels[7] + block[7]]; | |
527 pixels += line_size; | |
528 block += 8; | |
0 | 529 } |
530 } | |
2256 | 531 |
532 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, | |
533 int line_size) | |
534 { | |
535 int i; | |
536 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
2967 | 537 |
2256 | 538 /* read the pixels */ |
539 for(i=0;i<4;i++) { | |
540 pixels[0] = cm[pixels[0] + block[0]]; | |
541 pixels[1] = cm[pixels[1] + block[1]]; | |
542 pixels[2] = cm[pixels[2] + block[2]]; | |
543 pixels[3] = cm[pixels[3] + block[3]]; | |
544 pixels += line_size; | |
545 block += 8; | |
546 } | |
547 } | |
2257 | 548 |
549 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, | |
550 int line_size) | |
551 { | |
552 int i; | |
553 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
2967 | 554 |
2257 | 555 /* read the pixels */ |
556 for(i=0;i<2;i++) { | |
557 pixels[0] = cm[pixels[0] + block[0]]; | |
558 pixels[1] = cm[pixels[1] + block[1]]; | |
559 pixels += line_size; | |
560 block += 8; | |
561 } | |
562 } | |
2763 | 563 |
564 static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) | |
565 { | |
566 int i; | |
567 for(i=0;i<8;i++) { | |
568 pixels[0] += block[0]; | |
569 pixels[1] += block[1]; | |
570 pixels[2] += block[2]; | |
571 pixels[3] += block[3]; | |
572 pixels[4] += block[4]; | |
573 pixels[5] += block[5]; | |
574 pixels[6] += block[6]; | |
575 pixels[7] += block[7]; | |
576 pixels += line_size; | |
577 block += 8; | |
578 } | |
579 } | |
580 | |
581 static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) | |
582 { | |
583 int i; | |
584 for(i=0;i<4;i++) { | |
585 pixels[0] += block[0]; | |
586 pixels[1] += block[1]; | |
587 pixels[2] += block[2]; | |
588 pixels[3] += block[3]; | |
589 pixels += line_size; | |
590 block += 4; | |
591 } | |
592 } | |
593 | |
385 | 594 #if 0 |
595 | |
596 #define PIXOP2(OPNAME, OP) \ | |
651 | 597 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 598 {\ |
599 int i;\ | |
600 for(i=0; i<h; i++){\ | |
601 OP(*((uint64_t*)block), LD64(pixels));\ | |
602 pixels+=line_size;\ | |
603 block +=line_size;\ | |
604 }\ | |
605 }\ | |
606 \ | |
859 | 607 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 608 {\ |
609 int i;\ | |
610 for(i=0; i<h; i++){\ | |
611 const uint64_t a= LD64(pixels );\ | |
612 const uint64_t b= LD64(pixels+1);\ | |
613 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
614 pixels+=line_size;\ | |
615 block +=line_size;\ | |
616 }\ | |
617 }\ | |
618 \ | |
859 | 619 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 620 {\ |
621 int i;\ | |
622 for(i=0; i<h; i++){\ | |
623 const uint64_t a= LD64(pixels );\ | |
624 const uint64_t b= LD64(pixels+1);\ | |
625 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
626 pixels+=line_size;\ | |
627 block +=line_size;\ | |
628 }\ | |
629 }\ | |
630 \ | |
859 | 631 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 632 {\ |
633 int i;\ | |
634 for(i=0; i<h; i++){\ | |
635 const uint64_t a= LD64(pixels );\ | |
636 const uint64_t b= LD64(pixels+line_size);\ | |
637 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
638 pixels+=line_size;\ | |
639 block +=line_size;\ | |
640 }\ | |
641 }\ | |
642 \ | |
859 | 643 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 644 {\ |
645 int i;\ | |
646 for(i=0; i<h; i++){\ | |
647 const uint64_t a= LD64(pixels );\ | |
648 const uint64_t b= LD64(pixels+line_size);\ | |
649 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
650 pixels+=line_size;\ | |
651 block +=line_size;\ | |
652 }\ | |
653 }\ | |
654 \ | |
859 | 655 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 656 {\ |
657 int i;\ | |
658 const uint64_t a= LD64(pixels );\ | |
659 const uint64_t b= LD64(pixels+1);\ | |
660 uint64_t l0= (a&0x0303030303030303ULL)\ | |
661 + (b&0x0303030303030303ULL)\ | |
662 + 0x0202020202020202ULL;\ | |
663 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
664 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
665 uint64_t l1,h1;\ | |
666 \ | |
667 pixels+=line_size;\ | |
668 for(i=0; i<h; i+=2){\ | |
669 uint64_t a= LD64(pixels );\ | |
670 uint64_t b= LD64(pixels+1);\ | |
671 l1= (a&0x0303030303030303ULL)\ | |
672 + (b&0x0303030303030303ULL);\ | |
673 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
674 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
675 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
676 pixels+=line_size;\ | |
677 block +=line_size;\ | |
678 a= LD64(pixels );\ | |
679 b= LD64(pixels+1);\ | |
680 l0= (a&0x0303030303030303ULL)\ | |
681 + (b&0x0303030303030303ULL)\ | |
682 + 0x0202020202020202ULL;\ | |
683 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
684 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
685 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
686 pixels+=line_size;\ | |
687 block +=line_size;\ | |
688 }\ | |
689 }\ | |
690 \ | |
859 | 691 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 692 {\ |
693 int i;\ | |
694 const uint64_t a= LD64(pixels );\ | |
695 const uint64_t b= LD64(pixels+1);\ | |
696 uint64_t l0= (a&0x0303030303030303ULL)\ | |
697 + (b&0x0303030303030303ULL)\ | |
698 + 0x0101010101010101ULL;\ | |
699 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
700 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
701 uint64_t l1,h1;\ | |
702 \ | |
703 pixels+=line_size;\ | |
704 for(i=0; i<h; i+=2){\ | |
705 uint64_t a= LD64(pixels );\ | |
706 uint64_t b= LD64(pixels+1);\ | |
707 l1= (a&0x0303030303030303ULL)\ | |
708 + (b&0x0303030303030303ULL);\ | |
709 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
710 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
711 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
712 pixels+=line_size;\ | |
713 block +=line_size;\ | |
714 a= LD64(pixels );\ | |
715 b= LD64(pixels+1);\ | |
716 l0= (a&0x0303030303030303ULL)\ | |
717 + (b&0x0303030303030303ULL)\ | |
718 + 0x0101010101010101ULL;\ | |
719 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
720 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
721 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
722 pixels+=line_size;\ | |
723 block +=line_size;\ | |
724 }\ | |
725 }\ | |
726 \ | |
859 | 727 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\ |
728 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\ | |
729 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\ | |
730 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\ | |
731 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\ | |
732 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\ | |
733 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8) | |
385 | 734 |
735 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) | |
736 #else // 64 bit variant | |
737 | |
738 #define PIXOP2(OPNAME, OP) \ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
739 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
740 int i;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
741 for(i=0; i<h; i++){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
742 OP(*((uint16_t*)(block )), LD16(pixels ));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
743 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
744 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
745 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
746 }\ |
1168 | 747 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
748 int i;\ | |
749 for(i=0; i<h; i++){\ | |
750 OP(*((uint32_t*)(block )), LD32(pixels ));\ | |
751 pixels+=line_size;\ | |
752 block +=line_size;\ | |
753 }\ | |
754 }\ | |
859 | 755 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
385 | 756 int i;\ |
757 for(i=0; i<h; i++){\ | |
758 OP(*((uint32_t*)(block )), LD32(pixels ));\ | |
759 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\ | |
760 pixels+=line_size;\ | |
761 block +=line_size;\ | |
762 }\ | |
763 }\ | |
859 | 764 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
765 OPNAME ## _pixels8_c(block, pixels, line_size, h);\ | |
651 | 766 }\ |
385 | 767 \ |
651 | 768 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
769 int src_stride1, int src_stride2, int h){\ | |
385 | 770 int i;\ |
771 for(i=0; i<h; i++){\ | |
651 | 772 uint32_t a,b;\ |
773 a= LD32(&src1[i*src_stride1 ]);\ | |
774 b= LD32(&src2[i*src_stride2 ]);\ | |
1264 | 775 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ |
651 | 776 a= LD32(&src1[i*src_stride1+4]);\ |
777 b= LD32(&src2[i*src_stride2+4]);\ | |
1264 | 778 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ |
385 | 779 }\ |
780 }\ | |
781 \ | |
651 | 782 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
783 int src_stride1, int src_stride2, int h){\ | |
385 | 784 int i;\ |
785 for(i=0; i<h; i++){\ | |
651 | 786 uint32_t a,b;\ |
787 a= LD32(&src1[i*src_stride1 ]);\ | |
788 b= LD32(&src2[i*src_stride2 ]);\ | |
1264 | 789 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
651 | 790 a= LD32(&src1[i*src_stride1+4]);\ |
791 b= LD32(&src2[i*src_stride2+4]);\ | |
1264 | 792 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ |
385 | 793 }\ |
794 }\ | |
795 \ | |
1168 | 796 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
797 int src_stride1, int src_stride2, int h){\ | |
798 int i;\ | |
799 for(i=0; i<h; i++){\ | |
800 uint32_t a,b;\ | |
801 a= LD32(&src1[i*src_stride1 ]);\ | |
802 b= LD32(&src2[i*src_stride2 ]);\ | |
1264 | 803 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
1168 | 804 }\ |
805 }\ | |
806 \ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
807 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
808 int src_stride1, int src_stride2, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
809 int i;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
810 for(i=0; i<h; i++){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
811 uint32_t a,b;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
812 a= LD16(&src1[i*src_stride1 ]);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
813 b= LD16(&src2[i*src_stride2 ]);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
814 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
815 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
816 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
817 \ |
651 | 818 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
819 int src_stride1, int src_stride2, int h){\ | |
820 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | |
821 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | |
822 }\ | |
823 \ | |
824 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | |
825 int src_stride1, int src_stride2, int h){\ | |
826 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | |
827 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | |
828 }\ | |
829 \ | |
859 | 830 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 831 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
832 }\ | |
833 \ | |
859 | 834 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 835 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
836 }\ | |
837 \ | |
859 | 838 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 839 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
840 }\ | |
841 \ | |
859 | 842 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 843 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
385 | 844 }\ |
845 \ | |
651 | 846 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
847 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
848 int i;\ | |
849 for(i=0; i<h; i++){\ | |
850 uint32_t a, b, c, d, l0, l1, h0, h1;\ | |
851 a= LD32(&src1[i*src_stride1]);\ | |
852 b= LD32(&src2[i*src_stride2]);\ | |
853 c= LD32(&src3[i*src_stride3]);\ | |
854 d= LD32(&src4[i*src_stride4]);\ | |
855 l0= (a&0x03030303UL)\ | |
856 + (b&0x03030303UL)\ | |
857 + 0x02020202UL;\ | |
858 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
859 + ((b&0xFCFCFCFCUL)>>2);\ | |
860 l1= (c&0x03030303UL)\ | |
861 + (d&0x03030303UL);\ | |
862 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
863 + ((d&0xFCFCFCFCUL)>>2);\ | |
864 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
865 a= LD32(&src1[i*src_stride1+4]);\ | |
866 b= LD32(&src2[i*src_stride2+4]);\ | |
867 c= LD32(&src3[i*src_stride3+4]);\ | |
868 d= LD32(&src4[i*src_stride4+4]);\ | |
869 l0= (a&0x03030303UL)\ | |
870 + (b&0x03030303UL)\ | |
871 + 0x02020202UL;\ | |
872 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
873 + ((b&0xFCFCFCFCUL)>>2);\ | |
874 l1= (c&0x03030303UL)\ | |
875 + (d&0x03030303UL);\ | |
876 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
877 + ((d&0xFCFCFCFCUL)>>2);\ | |
878 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
879 }\ | |
880 }\ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
881 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
882 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
883 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
884 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
885 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
886 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
887 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
888 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
889 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
890 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
891 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
892 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
893 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
894 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
895 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
896 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
897 \ |
651 | 898 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
899 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
385 | 900 int i;\ |
901 for(i=0; i<h; i++){\ | |
651 | 902 uint32_t a, b, c, d, l0, l1, h0, h1;\ |
903 a= LD32(&src1[i*src_stride1]);\ | |
904 b= LD32(&src2[i*src_stride2]);\ | |
905 c= LD32(&src3[i*src_stride3]);\ | |
906 d= LD32(&src4[i*src_stride4]);\ | |
907 l0= (a&0x03030303UL)\ | |
908 + (b&0x03030303UL)\ | |
909 + 0x01010101UL;\ | |
910 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
911 + ((b&0xFCFCFCFCUL)>>2);\ | |
912 l1= (c&0x03030303UL)\ | |
913 + (d&0x03030303UL);\ | |
914 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
915 + ((d&0xFCFCFCFCUL)>>2);\ | |
916 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
917 a= LD32(&src1[i*src_stride1+4]);\ | |
918 b= LD32(&src2[i*src_stride2+4]);\ | |
919 c= LD32(&src3[i*src_stride3+4]);\ | |
920 d= LD32(&src4[i*src_stride4+4]);\ | |
921 l0= (a&0x03030303UL)\ | |
922 + (b&0x03030303UL)\ | |
923 + 0x01010101UL;\ | |
924 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
925 + ((b&0xFCFCFCFCUL)>>2);\ | |
926 l1= (c&0x03030303UL)\ | |
927 + (d&0x03030303UL);\ | |
928 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
929 + ((d&0xFCFCFCFCUL)>>2);\ | |
930 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
385 | 931 }\ |
932 }\ | |
651 | 933 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
934 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
935 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
936 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
937 }\ | |
938 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ | |
939 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
940 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
941 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
942 }\ | |
385 | 943 \ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
944 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
945 {\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
946 int i, a0, b0, a1, b1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
947 a0= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
948 b0= pixels[1] + 2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
949 a0 += b0;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
950 b0 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
951 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
952 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
953 for(i=0; i<h; i+=2){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
954 a1= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
955 b1= pixels[1];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
956 a1 += b1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
957 b1 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
958 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
959 block[0]= (a1+a0)>>2; /* FIXME non put */\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
960 block[1]= (b1+b0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
961 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
962 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
963 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
964 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
965 a0= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
966 b0= pixels[1] + 2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
967 a0 += b0;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
968 b0 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
969 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
970 block[0]= (a1+a0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
971 block[1]= (b1+b0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
972 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
973 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
974 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
975 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
976 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
977 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
978 {\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
979 int i;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
980 const uint32_t a= LD32(pixels );\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
981 const uint32_t b= LD32(pixels+1);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
982 uint32_t l0= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
983 + (b&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
984 + 0x02020202UL;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
985 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
986 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
987 uint32_t l1,h1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
988 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
989 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
990 for(i=0; i<h; i+=2){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
991 uint32_t a= LD32(pixels );\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
992 uint32_t b= LD32(pixels+1);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
993 l1= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
994 + (b&0x03030303UL);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
995 h1= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
996 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
997 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
998 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
999 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1000 a= LD32(pixels );\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1001 b= LD32(pixels+1);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1002 l0= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1003 + (b&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1004 + 0x02020202UL;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1005 h0= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1006 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1007 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1008 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1009 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1010 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1011 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1012 \ |
859 | 1013 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 1014 {\ |
1015 int j;\ | |
1016 for(j=0; j<2; j++){\ | |
1017 int i;\ | |
1018 const uint32_t a= LD32(pixels );\ | |
1019 const uint32_t b= LD32(pixels+1);\ | |
1020 uint32_t l0= (a&0x03030303UL)\ | |
1021 + (b&0x03030303UL)\ | |
1022 + 0x02020202UL;\ | |
1023 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1024 + ((b&0xFCFCFCFCUL)>>2);\ | |
1025 uint32_t l1,h1;\ | |
1026 \ | |
1027 pixels+=line_size;\ | |
1028 for(i=0; i<h; i+=2){\ | |
1029 uint32_t a= LD32(pixels );\ | |
1030 uint32_t b= LD32(pixels+1);\ | |
1031 l1= (a&0x03030303UL)\ | |
1032 + (b&0x03030303UL);\ | |
1033 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
1034 + ((b&0xFCFCFCFCUL)>>2);\ | |
1035 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1036 pixels+=line_size;\ | |
1037 block +=line_size;\ | |
1038 a= LD32(pixels );\ | |
1039 b= LD32(pixels+1);\ | |
1040 l0= (a&0x03030303UL)\ | |
1041 + (b&0x03030303UL)\ | |
1042 + 0x02020202UL;\ | |
1043 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1044 + ((b&0xFCFCFCFCUL)>>2);\ | |
1045 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1046 pixels+=line_size;\ | |
1047 block +=line_size;\ | |
1048 }\ | |
1049 pixels+=4-line_size*(h+1);\ | |
1050 block +=4-line_size*h;\ | |
1051 }\ | |
1052 }\ | |
1053 \ | |
859 | 1054 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 1055 {\ |
1056 int j;\ | |
1057 for(j=0; j<2; j++){\ | |
1058 int i;\ | |
1059 const uint32_t a= LD32(pixels );\ | |
1060 const uint32_t b= LD32(pixels+1);\ | |
1061 uint32_t l0= (a&0x03030303UL)\ | |
1062 + (b&0x03030303UL)\ | |
1063 + 0x01010101UL;\ | |
1064 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1065 + ((b&0xFCFCFCFCUL)>>2);\ | |
1066 uint32_t l1,h1;\ | |
1067 \ | |
1068 pixels+=line_size;\ | |
1069 for(i=0; i<h; i+=2){\ | |
1070 uint32_t a= LD32(pixels );\ | |
1071 uint32_t b= LD32(pixels+1);\ | |
1072 l1= (a&0x03030303UL)\ | |
1073 + (b&0x03030303UL);\ | |
1074 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
1075 + ((b&0xFCFCFCFCUL)>>2);\ | |
1076 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1077 pixels+=line_size;\ | |
1078 block +=line_size;\ | |
1079 a= LD32(pixels );\ | |
1080 b= LD32(pixels+1);\ | |
1081 l0= (a&0x03030303UL)\ | |
1082 + (b&0x03030303UL)\ | |
1083 + 0x01010101UL;\ | |
1084 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1085 + ((b&0xFCFCFCFCUL)>>2);\ | |
1086 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1087 pixels+=line_size;\ | |
1088 block +=line_size;\ | |
1089 }\ | |
1090 pixels+=4-line_size*(h+1);\ | |
1091 block +=4-line_size*h;\ | |
1092 }\ | |
1093 }\ | |
1094 \ | |
859 | 1095 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\ |
1096 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\ | |
1097 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\ | |
1098 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\ | |
1099 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\ | |
1100 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\ | |
1101 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ | |
1102 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ | |
651 | 1103 |
1264 | 1104 #define op_avg(a, b) a = rnd_avg32(a, b) |
385 | 1105 #endif |
1106 #define op_put(a, b) a = b | |
1107 | |
1108 PIXOP2(avg, op_avg) | |
1109 PIXOP2(put, op_put) | |
1110 #undef op_avg | |
1111 #undef op_put | |
1112 | |
0 | 1113 #define avg2(a,b) ((a+b+1)>>1) |
1114 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) | |
1115 | |
1864 | 1116 static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ |
1117 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h); | |
1118 } | |
1119 | |
1120 static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ | |
1121 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h); | |
1122 } | |
753 | 1123 |
1064 | 1124 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) |
255 | 1125 { |
1126 const int A=(16-x16)*(16-y16); | |
1127 const int B=( x16)*(16-y16); | |
1128 const int C=(16-x16)*( y16); | |
1129 const int D=( x16)*( y16); | |
1130 int i; | |
1131 | |
1132 for(i=0; i<h; i++) | |
1133 { | |
651 | 1134 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; |
1135 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; | |
1136 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; | |
1137 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8; | |
1138 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8; | |
1139 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8; | |
1140 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; | |
1141 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; | |
1142 dst+= stride; | |
1143 src+= stride; | |
255 | 1144 } |
1145 } | |
1146 | |
2967 | 1147 static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, |
753 | 1148 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) |
1149 { | |
1150 int y, vx, vy; | |
1151 const int s= 1<<shift; | |
2967 | 1152 |
753 | 1153 width--; |
1154 height--; | |
1155 | |
1156 for(y=0; y<h; y++){ | |
1157 int x; | |
1158 | |
1159 vx= ox; | |
1160 vy= oy; | |
1161 for(x=0; x<8; x++){ //XXX FIXME optimize | |
1162 int src_x, src_y, frac_x, frac_y, index; | |
1163 | |
1164 src_x= vx>>16; | |
1165 src_y= vy>>16; | |
1166 frac_x= src_x&(s-1); | |
1167 frac_y= src_y&(s-1); | |
1168 src_x>>=shift; | |
1169 src_y>>=shift; | |
2967 | 1170 |
753 | 1171 if((unsigned)src_x < width){ |
1172 if((unsigned)src_y < height){ | |
1173 index= src_x + src_y*stride; | |
1174 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) | |
1175 + src[index +1]* frac_x )*(s-frac_y) | |
1176 + ( src[index+stride ]*(s-frac_x) | |
1177 + src[index+stride+1]* frac_x )* frac_y | |
1178 + r)>>(shift*2); | |
1179 }else{ | |
2967 | 1180 index= src_x + clip(src_y, 0, height)*stride; |
1181 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) | |
753 | 1182 + src[index +1]* frac_x )*s |
1183 + r)>>(shift*2); | |
1184 } | |
1185 }else{ | |
1186 if((unsigned)src_y < height){ | |
2967 | 1187 index= clip(src_x, 0, width) + src_y*stride; |
1188 dst[y*stride + x]= ( ( src[index ]*(s-frac_y) | |
753 | 1189 + src[index+stride ]* frac_y )*s |
1190 + r)>>(shift*2); | |
1191 }else{ | |
2967 | 1192 index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride; |
753 | 1193 dst[y*stride + x]= src[index ]; |
1194 } | |
1195 } | |
2967 | 1196 |
753 | 1197 vx+= dxx; |
1198 vy+= dyx; | |
1199 } | |
1200 ox += dxy; | |
1201 oy += dyy; | |
1202 } | |
1203 } | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1204 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1205 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1206 switch(width){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1207 case 2: put_pixels2_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1208 case 4: put_pixels4_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1209 case 8: put_pixels8_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1210 case 16:put_pixels16_c(dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1211 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1212 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1213 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1214 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1215 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1216 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1217 for (j=0; j < width; j++) { |
2979 | 1218 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1219 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1220 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1221 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1222 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1223 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1224 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1225 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1226 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1227 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1228 for (j=0; j < width; j++) { |
2979 | 1229 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1230 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1231 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1232 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1233 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1234 } |
2967 | 1235 |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1236 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1237 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1238 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1239 for (j=0; j < width; j++) { |
2979 | 1240 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1241 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1242 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1243 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1244 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1245 } |
2967 | 1246 |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1247 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1248 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1249 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1250 for (j=0; j < width; j++) { |
2979 | 1251 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1252 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1253 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1254 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1255 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1256 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1257 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1258 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1259 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1260 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1261 for (j=0; j < width; j++) { |
2979 | 1262 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1263 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1264 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1265 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1266 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1267 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1268 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1269 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1270 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1271 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1272 for (j=0; j < width; j++) { |
2979 | 1273 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1274 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1275 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1276 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1277 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1278 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1279 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1280 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1281 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1282 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1283 for (j=0; j < width; j++) { |
2979 | 1284 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1285 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1286 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1287 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1288 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1289 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1290 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1291 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1292 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1293 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1294 for (j=0; j < width; j++) { |
2979 | 1295 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1296 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1297 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1298 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1299 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1300 } |
1319 | 1301 |
1302 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1303 switch(width){ | |
1304 case 2: avg_pixels2_c (dst, src, stride, height); break; | |
1305 case 4: avg_pixels4_c (dst, src, stride, height); break; | |
1306 case 8: avg_pixels8_c (dst, src, stride, height); break; | |
1307 case 16:avg_pixels16_c(dst, src, stride, height); break; | |
1308 } | |
1309 } | |
1310 | |
1311 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1312 int i,j; | |
1313 for (i=0; i < height; i++) { | |
1314 for (j=0; j < width; j++) { | |
2979 | 1315 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; |
1319 | 1316 } |
1317 src += stride; | |
1318 dst += stride; | |
1319 } | |
1320 } | |
1321 | |
1322 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1323 int i,j; | |
1324 for (i=0; i < height; i++) { | |
1325 for (j=0; j < width; j++) { | |
2979 | 1326 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; |
1319 | 1327 } |
1328 src += stride; | |
1329 dst += stride; | |
1330 } | |
1331 } | |
2967 | 1332 |
1319 | 1333 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
1334 int i,j; | |
1335 for (i=0; i < height; i++) { | |
1336 for (j=0; j < width; j++) { | |
2979 | 1337 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; |
1319 | 1338 } |
1339 src += stride; | |
1340 dst += stride; | |
1341 } | |
1342 } | |
2967 | 1343 |
1319 | 1344 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
1345 int i,j; | |
1346 for (i=0; i < height; i++) { | |
1347 for (j=0; j < width; j++) { | |
2979 | 1348 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1349 } |
1350 src += stride; | |
1351 dst += stride; | |
1352 } | |
1353 } | |
1354 | |
1355 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1356 int i,j; | |
1357 for (i=0; i < height; i++) { | |
1358 for (j=0; j < width; j++) { | |
2979 | 1359 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1360 } |
1361 src += stride; | |
1362 dst += stride; | |
1363 } | |
1364 } | |
1365 | |
1366 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1367 int i,j; | |
1368 for (i=0; i < height; i++) { | |
1369 for (j=0; j < width; j++) { | |
2979 | 1370 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; |
1319 | 1371 } |
1372 src += stride; | |
1373 dst += stride; | |
1374 } | |
1375 } | |
1376 | |
1377 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1378 int i,j; | |
1379 for (i=0; i < height; i++) { | |
1380 for (j=0; j < width; j++) { | |
2979 | 1381 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1382 } |
1383 src += stride; | |
1384 dst += stride; | |
1385 } | |
1386 } | |
1387 | |
1388 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1389 int i,j; | |
1390 for (i=0; i < height; i++) { | |
1391 for (j=0; j < width; j++) { | |
2979 | 1392 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1393 } |
1394 src += stride; | |
1395 dst += stride; | |
1396 } | |
1397 } | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1398 #if 0 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1399 #define TPEL_WIDTH(width)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1400 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1401 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1402 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1403 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1404 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1405 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1406 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1407 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1408 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1409 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1410 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1411 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1412 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1413 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1414 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1415 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1416 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1417 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);} |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1418 #endif |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1419 |
1168 | 1420 #define H264_CHROMA_MC(OPNAME, OP)\ |
1421 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1422 const int A=(8-x)*(8-y);\ | |
1423 const int B=( x)*(8-y);\ | |
1424 const int C=(8-x)*( y);\ | |
1425 const int D=( x)*( y);\ | |
1426 int i;\ | |
1427 \ | |
1428 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1429 \ | |
1430 for(i=0; i<h; i++)\ | |
1431 {\ | |
1432 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | |
1433 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1434 dst+= stride;\ | |
1435 src+= stride;\ | |
1436 }\ | |
1437 }\ | |
1438 \ | |
1439 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1440 const int A=(8-x)*(8-y);\ | |
1441 const int B=( x)*(8-y);\ | |
1442 const int C=(8-x)*( y);\ | |
1443 const int D=( x)*( y);\ | |
1444 int i;\ | |
1445 \ | |
1446 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1447 \ | |
1448 for(i=0; i<h; i++)\ | |
1449 {\ | |
1450 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | |
1451 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1452 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | |
1453 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | |
1454 dst+= stride;\ | |
1455 src+= stride;\ | |
1456 }\ | |
1457 }\ | |
1458 \ | |
1459 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1460 const int A=(8-x)*(8-y);\ | |
1461 const int B=( x)*(8-y);\ | |
1462 const int C=(8-x)*( y);\ | |
1463 const int D=( x)*( y);\ | |
1464 int i;\ | |
1465 \ | |
1466 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1467 \ | |
1468 for(i=0; i<h; i++)\ | |
1469 {\ | |
1470 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | |
1471 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1472 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | |
1473 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | |
1474 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ | |
1475 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ | |
1476 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ | |
1477 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ | |
1478 dst+= stride;\ | |
1479 src+= stride;\ | |
1480 }\ | |
1481 } | |
1482 | |
1483 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) | |
1484 #define op_put(a, b) a = (((b) + 32)>>6) | |
1485 | |
1486 H264_CHROMA_MC(put_ , op_put) | |
1487 H264_CHROMA_MC(avg_ , op_avg) | |
1488 #undef op_avg | |
1489 #undef op_put | |
1490 | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
1491 static inline void copy_block2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
1492 { |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
1493 int i; |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
1494 for(i=0; i<h; i++) |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
1495 { |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
1496 ST16(dst , LD16(src )); |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
1497 dst+=dstStride; |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
1498 src+=srcStride; |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
1499 } |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
1500 } |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
1501 |
1168 | 1502 static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) |
1503 { | |
1504 int i; | |
1505 for(i=0; i<h; i++) | |
1506 { | |
1507 ST32(dst , LD32(src )); | |
1508 dst+=dstStride; | |
1509 src+=srcStride; | |
1510 } | |
1511 } | |
1512 | |
1513 static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | |
1514 { | |
1515 int i; | |
1516 for(i=0; i<h; i++) | |
1517 { | |
1518 ST32(dst , LD32(src )); | |
1519 ST32(dst+4 , LD32(src+4 )); | |
1520 dst+=dstStride; | |
1521 src+=srcStride; | |
1522 } | |
1523 } | |
1524 | |
1525 static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | |
1526 { | |
1527 int i; | |
1528 for(i=0; i<h; i++) | |
1529 { | |
1530 ST32(dst , LD32(src )); | |
1531 ST32(dst+4 , LD32(src+4 )); | |
1532 ST32(dst+8 , LD32(src+8 )); | |
1533 ST32(dst+12, LD32(src+12)); | |
1534 dst+=dstStride; | |
1535 src+=srcStride; | |
1536 } | |
1537 } | |
753 | 1538 |
1064 | 1539 static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) |
255 | 1540 { |
1541 int i; | |
1542 for(i=0; i<h; i++) | |
1543 { | |
651 | 1544 ST32(dst , LD32(src )); |
1545 ST32(dst+4 , LD32(src+4 )); | |
1546 ST32(dst+8 , LD32(src+8 )); | |
1547 ST32(dst+12, LD32(src+12)); | |
1548 dst[16]= src[16]; | |
255 | 1549 dst+=dstStride; |
1550 src+=srcStride; | |
1551 } | |
1552 } | |
1553 | |
1064 | 1554 static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) |
255 | 1555 { |
1556 int i; | |
651 | 1557 for(i=0; i<h; i++) |
255 | 1558 { |
651 | 1559 ST32(dst , LD32(src )); |
1560 ST32(dst+4 , LD32(src+4 )); | |
1561 dst[8]= src[8]; | |
255 | 1562 dst+=dstStride; |
1563 src+=srcStride; | |
1564 } | |
1565 } | |
1566 | |
954 | 1567 |
651 | 1568 #define QPEL_MC(r, OPNAME, RND, OP) \ |
1064 | 1569 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
1570 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
651 | 1571 int i;\ |
1572 for(i=0; i<h; i++)\ | |
1573 {\ | |
1574 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\ | |
1575 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\ | |
1576 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\ | |
1577 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\ | |
1578 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\ | |
1579 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\ | |
1580 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\ | |
1581 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\ | |
1582 dst+=dstStride;\ | |
1583 src+=srcStride;\ | |
1584 }\ | |
1585 }\ | |
1586 \ | |
1064 | 1587 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
984 | 1588 const int w=8;\ |
1064 | 1589 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ |
651 | 1590 int i;\ |
1591 for(i=0; i<w; i++)\ | |
1592 {\ | |
1593 const int src0= src[0*srcStride];\ | |
1594 const int src1= src[1*srcStride];\ | |
1595 const int src2= src[2*srcStride];\ | |
1596 const int src3= src[3*srcStride];\ | |
1597 const int src4= src[4*srcStride];\ | |
1598 const int src5= src[5*srcStride];\ | |
1599 const int src6= src[6*srcStride];\ | |
1600 const int src7= src[7*srcStride];\ | |
1601 const int src8= src[8*srcStride];\ | |
1602 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\ | |
1603 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\ | |
1604 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\ | |
1605 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\ | |
1606 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\ | |
1607 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\ | |
1608 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\ | |
1609 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ | |
1610 dst++;\ | |
1611 src++;\ | |
1612 }\ | |
1613 }\ | |
1614 \ | |
1064 | 1615 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
1616 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
651 | 1617 int i;\ |
954 | 1618 \ |
651 | 1619 for(i=0; i<h; i++)\ |
1620 {\ | |
1621 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\ | |
1622 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\ | |
1623 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\ | |
1624 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\ | |
1625 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\ | |
1626 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\ | |
1627 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\ | |
1628 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\ | |
1629 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\ | |
1630 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\ | |
1631 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\ | |
1632 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\ | |
1633 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\ | |
1634 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\ | |
1635 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\ | |
1636 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\ | |
1637 dst+=dstStride;\ | |
1638 src+=srcStride;\ | |
1639 }\ | |
255 | 1640 }\ |
1641 \ | |
1064 | 1642 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
1643 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
651 | 1644 int i;\ |
954 | 1645 const int w=16;\ |
651 | 1646 for(i=0; i<w; i++)\ |
1647 {\ | |
1648 const int src0= src[0*srcStride];\ | |
1649 const int src1= src[1*srcStride];\ | |
1650 const int src2= src[2*srcStride];\ | |
1651 const int src3= src[3*srcStride];\ | |
1652 const int src4= src[4*srcStride];\ | |
1653 const int src5= src[5*srcStride];\ | |
1654 const int src6= src[6*srcStride];\ | |
1655 const int src7= src[7*srcStride];\ | |
1656 const int src8= src[8*srcStride];\ | |
1657 const int src9= src[9*srcStride];\ | |
1658 const int src10= src[10*srcStride];\ | |
1659 const int src11= src[11*srcStride];\ | |
1660 const int src12= src[12*srcStride];\ | |
1661 const int src13= src[13*srcStride];\ | |
1662 const int src14= src[14*srcStride];\ | |
1663 const int src15= src[15*srcStride];\ | |
1664 const int src16= src[16*srcStride];\ | |
1665 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\ | |
1666 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\ | |
1667 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\ | |
1668 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\ | |
1669 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\ | |
1670 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\ | |
1671 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\ | |
1672 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\ | |
1673 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\ | |
1674 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\ | |
1675 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\ | |
1676 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\ | |
1677 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\ | |
1678 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\ | |
1679 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\ | |
1680 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ | |
1681 dst++;\ | |
1682 src++;\ | |
1683 }\ | |
255 | 1684 }\ |
1685 \ | |
1064 | 1686 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ |
859 | 1687 OPNAME ## pixels8_c(dst, src, stride, 8);\ |
255 | 1688 }\ |
1689 \ | |
1064 | 1690 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ |
1691 uint8_t half[64];\ | |
651 | 1692 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ |
1693 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\ | |
1694 }\ | |
1695 \ | |
1064 | 1696 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ |
651 | 1697 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\ |
255 | 1698 }\ |
1699 \ | |
1064 | 1700 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ |
1701 uint8_t half[64];\ | |
651 | 1702 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ |
1703 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\ | |
1704 }\ | |
1705 \ | |
1064 | 1706 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ |
1707 uint8_t full[16*9];\ | |
1708 uint8_t half[64];\ | |
651 | 1709 copy_block9(full, src, 16, stride, 9);\ |
984 | 1710 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ |
651 | 1711 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\ |
1712 }\ | |
1713 \ | |
1064 | 1714 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ |
1715 uint8_t full[16*9];\ | |
651 | 1716 copy_block9(full, src, 16, stride, 9);\ |
984 | 1717 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ |
255 | 1718 }\ |
1719 \ | |
1064 | 1720 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ |
1721 uint8_t full[16*9];\ | |
1722 uint8_t half[64];\ | |
651 | 1723 copy_block9(full, src, 16, stride, 9);\ |
984 | 1724 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ |
651 | 1725 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\ |
1726 }\ | |
1064 | 1727 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1728 uint8_t full[16*9];\ | |
1729 uint8_t halfH[72];\ | |
1730 uint8_t halfV[64];\ | |
1731 uint8_t halfHV[64];\ | |
651 | 1732 copy_block9(full, src, 16, stride, 9);\ |
1733 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1734 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1735 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1736 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1737 }\ |
1064 | 1738 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ |
1739 uint8_t full[16*9];\ | |
1740 uint8_t halfH[72];\ | |
1741 uint8_t halfHV[64];\ | |
984 | 1742 copy_block9(full, src, 16, stride, 9);\ |
1743 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1744 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1745 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1746 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
1747 }\ | |
1064 | 1748 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1749 uint8_t full[16*9];\ | |
1750 uint8_t halfH[72];\ | |
1751 uint8_t halfV[64];\ | |
1752 uint8_t halfHV[64];\ | |
651 | 1753 copy_block9(full, src, 16, stride, 9);\ |
1754 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1755 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1756 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1757 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1758 }\ |
1064 | 1759 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ |
1760 uint8_t full[16*9];\ | |
1761 uint8_t halfH[72];\ | |
1762 uint8_t halfHV[64];\ | |
984 | 1763 copy_block9(full, src, 16, stride, 9);\ |
1764 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1765 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1766 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1767 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
1768 }\ | |
1064 | 1769 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1770 uint8_t full[16*9];\ | |
1771 uint8_t halfH[72];\ | |
1772 uint8_t halfV[64];\ | |
1773 uint8_t halfHV[64];\ | |
651 | 1774 copy_block9(full, src, 16, stride, 9);\ |
1775 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1776 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1777 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1778 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
1779 }\ | |
1064 | 1780 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ |
1781 uint8_t full[16*9];\ | |
1782 uint8_t halfH[72];\ | |
1783 uint8_t halfHV[64];\ | |
984 | 1784 copy_block9(full, src, 16, stride, 9);\ |
1785 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1786 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1787 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1788 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
1789 }\ | |
1064 | 1790 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1791 uint8_t full[16*9];\ | |
1792 uint8_t halfH[72];\ | |
1793 uint8_t halfV[64];\ | |
1794 uint8_t halfHV[64];\ | |
651 | 1795 copy_block9(full, src, 16, stride, 9);\ |
1796 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\ | |
984 | 1797 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1798 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1799 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1800 }\ |
1064 | 1801 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ |
1802 uint8_t full[16*9];\ | |
1803 uint8_t halfH[72];\ | |
1804 uint8_t halfHV[64];\ | |
984 | 1805 copy_block9(full, src, 16, stride, 9);\ |
1806 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1807 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1808 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1809 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
1810 }\ | |
1064 | 1811 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ |
1812 uint8_t halfH[72];\ | |
1813 uint8_t halfHV[64];\ | |
651 | 1814 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1815 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ |
651 | 1816 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ |
1817 }\ | |
1064 | 1818 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ |
1819 uint8_t halfH[72];\ | |
1820 uint8_t halfHV[64];\ | |
651 | 1821 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1822 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ |
651 | 1823 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ |
1824 }\ | |
1064 | 1825 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1826 uint8_t full[16*9];\ | |
1827 uint8_t halfH[72];\ | |
1828 uint8_t halfV[64];\ | |
1829 uint8_t halfHV[64];\ | |
651 | 1830 copy_block9(full, src, 16, stride, 9);\ |
1831 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1832 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1833 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1834 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ |
255 | 1835 }\ |
1064 | 1836 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ |
1837 uint8_t full[16*9];\ | |
1838 uint8_t halfH[72];\ | |
984 | 1839 copy_block9(full, src, 16, stride, 9);\ |
1840 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1841 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1842 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
1843 }\ | |
1064 | 1844 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1845 uint8_t full[16*9];\ | |
1846 uint8_t halfH[72];\ | |
1847 uint8_t halfV[64];\ | |
1848 uint8_t halfHV[64];\ | |
651 | 1849 copy_block9(full, src, 16, stride, 9);\ |
1850 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1851 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1852 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1853 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ |
1854 }\ | |
1064 | 1855 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ |
1856 uint8_t full[16*9];\ | |
1857 uint8_t halfH[72];\ | |
984 | 1858 copy_block9(full, src, 16, stride, 9);\ |
1859 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1860 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1861 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
1862 }\ | |
1064 | 1863 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ |
1864 uint8_t halfH[72];\ | |
651 | 1865 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1866 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ |
651 | 1867 }\ |
1064 | 1868 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ |
859 | 1869 OPNAME ## pixels16_c(dst, src, stride, 16);\ |
255 | 1870 }\ |
651 | 1871 \ |
1064 | 1872 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ |
1873 uint8_t half[256];\ | |
651 | 1874 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ |
1875 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\ | |
1876 }\ | |
1877 \ | |
1064 | 1878 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ |
651 | 1879 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\ |
1880 }\ | |
1881 \ | |
1064 | 1882 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ |
1883 uint8_t half[256];\ | |
651 | 1884 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ |
1885 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\ | |
1886 }\ | |
1887 \ | |
1064 | 1888 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ |
1889 uint8_t full[24*17];\ | |
1890 uint8_t half[256];\ | |
651 | 1891 copy_block17(full, src, 24, stride, 17);\ |
954 | 1892 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ |
651 | 1893 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\ |
255 | 1894 }\ |
651 | 1895 \ |
1064 | 1896 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ |
1897 uint8_t full[24*17];\ | |
651 | 1898 copy_block17(full, src, 24, stride, 17);\ |
954 | 1899 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\ |
651 | 1900 }\ |
1901 \ | |
1064 | 1902 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ |
1903 uint8_t full[24*17];\ | |
1904 uint8_t half[256];\ | |
651 | 1905 copy_block17(full, src, 24, stride, 17);\ |
954 | 1906 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ |
651 | 1907 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\ |
255 | 1908 }\ |
1064 | 1909 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1910 uint8_t full[24*17];\ | |
1911 uint8_t halfH[272];\ | |
1912 uint8_t halfV[256];\ | |
1913 uint8_t halfHV[256];\ | |
651 | 1914 copy_block17(full, src, 24, stride, 17);\ |
1915 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1916 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
1917 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1918 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
1919 }\ | |
1064 | 1920 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ |
1921 uint8_t full[24*17];\ | |
1922 uint8_t halfH[272];\ | |
1923 uint8_t halfHV[256];\ | |
984 | 1924 copy_block17(full, src, 24, stride, 17);\ |
1925 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1926 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
1927 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1928 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
1929 }\ | |
1064 | 1930 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1931 uint8_t full[24*17];\ | |
1932 uint8_t halfH[272];\ | |
1933 uint8_t halfV[256];\ | |
1934 uint8_t halfHV[256];\ | |
651 | 1935 copy_block17(full, src, 24, stride, 17);\ |
1936 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1937 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
1938 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1939 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
1940 }\ | |
1064 | 1941 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ |
1942 uint8_t full[24*17];\ | |
1943 uint8_t halfH[272];\ | |
1944 uint8_t halfHV[256];\ | |
984 | 1945 copy_block17(full, src, 24, stride, 17);\ |
1946 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1947 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
1948 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1949 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
1950 }\ | |
1064 | 1951 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1952 uint8_t full[24*17];\ | |
1953 uint8_t halfH[272];\ | |
1954 uint8_t halfV[256];\ | |
1955 uint8_t halfHV[256];\ | |
651 | 1956 copy_block17(full, src, 24, stride, 17);\ |
1957 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1958 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
1959 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1960 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
255 | 1961 }\ |
1064 | 1962 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ |
1963 uint8_t full[24*17];\ | |
1964 uint8_t halfH[272];\ | |
1965 uint8_t halfHV[256];\ | |
984 | 1966 copy_block17(full, src, 24, stride, 17);\ |
1967 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1968 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
1969 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1970 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
1971 }\ | |
1064 | 1972 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1973 uint8_t full[24*17];\ | |
1974 uint8_t halfH[272];\ | |
1975 uint8_t halfV[256];\ | |
1976 uint8_t halfHV[256];\ | |
651 | 1977 copy_block17(full, src, 24, stride, 17);\ |
1978 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\ | |
954 | 1979 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
1980 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1981 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
1982 }\ | |
1064 | 1983 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ |
1984 uint8_t full[24*17];\ | |
1985 uint8_t halfH[272];\ | |
1986 uint8_t halfHV[256];\ | |
984 | 1987 copy_block17(full, src, 24, stride, 17);\ |
1988 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1989 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
1990 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1991 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
1992 }\ | |
1064 | 1993 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ |
1994 uint8_t halfH[272];\ | |
1995 uint8_t halfHV[256];\ | |
651 | 1996 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 1997 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ |
651 | 1998 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ |
255 | 1999 }\ |
1064 | 2000 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ |
2001 uint8_t halfH[272];\ | |
2002 uint8_t halfHV[256];\ | |
651 | 2003 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 2004 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ |
651 | 2005 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ |
2006 }\ | |
1064 | 2007 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
2008 uint8_t full[24*17];\ | |
2009 uint8_t halfH[272];\ | |
2010 uint8_t halfV[256];\ | |
2011 uint8_t halfHV[256];\ | |
651 | 2012 copy_block17(full, src, 24, stride, 17);\ |
2013 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 2014 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
2015 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 2016 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ |
255 | 2017 }\ |
1064 | 2018 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ |
2019 uint8_t full[24*17];\ | |
2020 uint8_t halfH[272];\ | |
984 | 2021 copy_block17(full, src, 24, stride, 17);\ |
2022 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2023 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
2024 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
2025 }\ | |
1064 | 2026 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
2027 uint8_t full[24*17];\ | |
2028 uint8_t halfH[272];\ | |
2029 uint8_t halfV[256];\ | |
2030 uint8_t halfHV[256];\ | |
651 | 2031 copy_block17(full, src, 24, stride, 17);\ |
2032 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 2033 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
2034 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 2035 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ |
2036 }\ | |
1064 | 2037 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ |
2038 uint8_t full[24*17];\ | |
2039 uint8_t halfH[272];\ | |
984 | 2040 copy_block17(full, src, 24, stride, 17);\ |
2041 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
2042 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
2043 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
2044 }\ | |
1064 | 2045 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ |
2046 uint8_t halfH[272];\ | |
651 | 2047 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 2048 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ |
859 | 2049 } |
255 | 2050 |
651 | 2051 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) |
2052 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1) | |
2053 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
2054 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5] | |
2055 | |
2056 QPEL_MC(0, put_ , _ , op_put) | |
2057 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) | |
2058 QPEL_MC(0, avg_ , _ , op_avg) | |
2059 //QPEL_MC(1, avg_no_rnd , _ , op_avg) | |
2060 #undef op_avg | |
2061 #undef op_avg_no_rnd | |
2062 #undef op_put | |
2063 #undef op_put_no_rnd | |
255 | 2064 |
1168 | 2065 #if 1 |
2066 #define H264_LOWPASS(OPNAME, OP, OP2) \ | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2067 static void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2068 const int h=2;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2069 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2070 int i;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2071 for(i=0; i<h; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2072 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2073 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2074 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2075 dst+=dstStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2076 src+=srcStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2077 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2078 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2079 \ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2080 static void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2081 const int w=2;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2082 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2083 int i;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2084 for(i=0; i<w; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2085 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2086 const int srcB= src[-2*srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2087 const int srcA= src[-1*srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2088 const int src0= src[0 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2089 const int src1= src[1 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2090 const int src2= src[2 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2091 const int src3= src[3 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2092 const int src4= src[4 *srcStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2093 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2094 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2095 dst++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2096 src++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2097 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2098 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2099 \ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2100 static void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2101 const int h=2;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2102 const int w=2;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2103 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2104 int i;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2105 src -= 2*srcStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2106 for(i=0; i<h+5; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2107 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2108 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2109 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2110 tmp+=tmpStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2111 src+=srcStride;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2112 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2113 tmp -= tmpStride*(h+5-2);\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2114 for(i=0; i<w; i++)\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2115 {\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2116 const int tmpB= tmp[-2*tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2117 const int tmpA= tmp[-1*tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2118 const int tmp0= tmp[0 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2119 const int tmp1= tmp[1 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2120 const int tmp2= tmp[2 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2121 const int tmp3= tmp[3 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2122 const int tmp4= tmp[4 *tmpStride];\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2123 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2124 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2125 dst++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2126 tmp++;\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2127 }\ |
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2128 }\ |
1168 | 2129 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
2130 const int h=4;\ | |
2131 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
2132 int i;\ | |
2133 for(i=0; i<h; i++)\ | |
2134 {\ | |
2135 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ | |
2136 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ | |
2137 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ | |
2138 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ | |
2139 dst+=dstStride;\ | |
2140 src+=srcStride;\ | |
2141 }\ | |
2142 }\ | |
2143 \ | |
2144 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2145 const int w=4;\ | |
2146 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
2147 int i;\ | |
2148 for(i=0; i<w; i++)\ | |
2149 {\ | |
2150 const int srcB= src[-2*srcStride];\ | |
2151 const int srcA= src[-1*srcStride];\ | |
2152 const int src0= src[0 *srcStride];\ | |
2153 const int src1= src[1 *srcStride];\ | |
2154 const int src2= src[2 *srcStride];\ | |
2155 const int src3= src[3 *srcStride];\ | |
2156 const int src4= src[4 *srcStride];\ | |
2157 const int src5= src[5 *srcStride];\ | |
2158 const int src6= src[6 *srcStride];\ | |
2159 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
2160 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
2161 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
2162 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
2163 dst++;\ | |
2164 src++;\ | |
2165 }\ | |
2166 }\ | |
2167 \ | |
2168 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2169 const int h=4;\ | |
2170 const int w=4;\ | |
2171 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
2172 int i;\ | |
2173 src -= 2*srcStride;\ | |
2174 for(i=0; i<h+5; i++)\ | |
2175 {\ | |
2176 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ | |
2177 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ | |
2178 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\ | |
2179 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\ | |
2180 tmp+=tmpStride;\ | |
2181 src+=srcStride;\ | |
2182 }\ | |
2183 tmp -= tmpStride*(h+5-2);\ | |
2184 for(i=0; i<w; i++)\ | |
2185 {\ | |
2186 const int tmpB= tmp[-2*tmpStride];\ | |
2187 const int tmpA= tmp[-1*tmpStride];\ | |
2188 const int tmp0= tmp[0 *tmpStride];\ | |
2189 const int tmp1= tmp[1 *tmpStride];\ | |
2190 const int tmp2= tmp[2 *tmpStride];\ | |
2191 const int tmp3= tmp[3 *tmpStride];\ | |
2192 const int tmp4= tmp[4 *tmpStride];\ | |
2193 const int tmp5= tmp[5 *tmpStride];\ | |
2194 const int tmp6= tmp[6 *tmpStride];\ | |
2195 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |
2196 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |
2197 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | |
2198 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | |
2199 dst++;\ | |
2200 tmp++;\ | |
2201 }\ | |
2202 }\ | |
2203 \ | |
2204 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2205 const int h=8;\ | |
2206 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
2207 int i;\ | |
2208 for(i=0; i<h; i++)\ | |
2209 {\ | |
2210 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\ | |
2211 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\ | |
2212 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\ | |
2213 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\ | |
2214 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\ | |
2215 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\ | |
2216 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\ | |
2217 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\ | |
2218 dst+=dstStride;\ | |
2219 src+=srcStride;\ | |
2220 }\ | |
2221 }\ | |
2222 \ | |
2223 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2224 const int w=8;\ | |
2225 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
2226 int i;\ | |
2227 for(i=0; i<w; i++)\ | |
2228 {\ | |
2229 const int srcB= src[-2*srcStride];\ | |
2230 const int srcA= src[-1*srcStride];\ | |
2231 const int src0= src[0 *srcStride];\ | |
2232 const int src1= src[1 *srcStride];\ | |
2233 const int src2= src[2 *srcStride];\ | |
2234 const int src3= src[3 *srcStride];\ | |
2235 const int src4= src[4 *srcStride];\ | |
2236 const int src5= src[5 *srcStride];\ | |
2237 const int src6= src[6 *srcStride];\ | |
2238 const int src7= src[7 *srcStride];\ | |
2239 const int src8= src[8 *srcStride];\ | |
2240 const int src9= src[9 *srcStride];\ | |
2241 const int src10=src[10*srcStride];\ | |
2242 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
2243 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
2244 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
2245 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
2246 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ | |
2247 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ | |
2248 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ | |
2249 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ | |
2250 dst++;\ | |
2251 src++;\ | |
2252 }\ | |
2253 }\ | |
2254 \ | |
2255 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2256 const int h=8;\ | |
2257 const int w=8;\ | |
2258 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
2259 int i;\ | |
2260 src -= 2*srcStride;\ | |
2261 for(i=0; i<h+5; i++)\ | |
2262 {\ | |
2263 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\ | |
2264 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\ | |
2265 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\ | |
2266 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\ | |
2267 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\ | |
2268 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\ | |
2269 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\ | |
2270 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\ | |
2271 tmp+=tmpStride;\ | |
2272 src+=srcStride;\ | |
2273 }\ | |
2274 tmp -= tmpStride*(h+5-2);\ | |
2275 for(i=0; i<w; i++)\ | |
2276 {\ | |
2277 const int tmpB= tmp[-2*tmpStride];\ | |
2278 const int tmpA= tmp[-1*tmpStride];\ | |
2279 const int tmp0= tmp[0 *tmpStride];\ | |
2280 const int tmp1= tmp[1 *tmpStride];\ | |
2281 const int tmp2= tmp[2 *tmpStride];\ | |
2282 const int tmp3= tmp[3 *tmpStride];\ | |
2283 const int tmp4= tmp[4 *tmpStride];\ | |
2284 const int tmp5= tmp[5 *tmpStride];\ | |
2285 const int tmp6= tmp[6 *tmpStride];\ | |
2286 const int tmp7= tmp[7 *tmpStride];\ | |
2287 const int tmp8= tmp[8 *tmpStride];\ | |
2288 const int tmp9= tmp[9 *tmpStride];\ | |
2289 const int tmp10=tmp[10*tmpStride];\ | |
2290 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |
2291 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |
2292 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | |
2293 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | |
2294 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\ | |
2295 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\ | |
2296 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\ | |
2297 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\ | |
2298 dst++;\ | |
2299 tmp++;\ | |
2300 }\ | |
2301 }\ | |
2302 \ | |
2303 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2304 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | |
2305 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2306 src += 8*srcStride;\ | |
2307 dst += 8*dstStride;\ | |
2308 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | |
2309 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2310 }\ | |
2311 \ | |
2312 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2313 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | |
2314 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2315 src += 8*srcStride;\ | |
2316 dst += 8*dstStride;\ | |
2317 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | |
2318 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2319 }\ | |
2320 \ | |
2321 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2322 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | |
2323 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | |
2324 src += 8*srcStride;\ | |
2325 dst += 8*dstStride;\ | |
2326 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | |
2327 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | |
2328 }\ | |
2329 | |
2330 #define H264_MC(OPNAME, SIZE) \ | |
2331 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\ | |
2332 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ | |
2333 }\ | |
2334 \ | |
2335 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2336 uint8_t half[SIZE*SIZE];\ | |
2337 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
2338 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\ | |
2339 }\ | |
2340 \ | |
2341 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2342 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ | |
2343 }\ | |
2344 \ | |
2345 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2346 uint8_t half[SIZE*SIZE];\ | |
2347 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
2348 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\ | |
2349 }\ | |
2350 \ | |
2351 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2352 uint8_t full[SIZE*(SIZE+5)];\ | |
2353 uint8_t * const full_mid= full + SIZE*2;\ | |
2354 uint8_t half[SIZE*SIZE];\ | |
2355 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2356 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
2357 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ | |
2358 }\ | |
2359 \ | |
2360 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2361 uint8_t full[SIZE*(SIZE+5)];\ | |
2362 uint8_t * const full_mid= full + SIZE*2;\ | |
2363 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2364 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ | |
2365 }\ | |
2366 \ | |
2367 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2368 uint8_t full[SIZE*(SIZE+5)];\ | |
2369 uint8_t * const full_mid= full + SIZE*2;\ | |
2370 uint8_t half[SIZE*SIZE];\ | |
2371 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2372 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
2373 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ | |
2374 }\ | |
2375 \ | |
2376 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2377 uint8_t full[SIZE*(SIZE+5)];\ | |
2378 uint8_t * const full_mid= full + SIZE*2;\ | |
2379 uint8_t halfH[SIZE*SIZE];\ | |
2380 uint8_t halfV[SIZE*SIZE];\ | |
2381 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2382 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2383 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2384 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2385 }\ | |
2386 \ | |
2387 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2388 uint8_t full[SIZE*(SIZE+5)];\ | |
2389 uint8_t * const full_mid= full + SIZE*2;\ | |
2390 uint8_t halfH[SIZE*SIZE];\ | |
2391 uint8_t halfV[SIZE*SIZE];\ | |
2392 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2393 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2394 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2395 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2396 }\ | |
2397 \ | |
2398 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2399 uint8_t full[SIZE*(SIZE+5)];\ | |
2400 uint8_t * const full_mid= full + SIZE*2;\ | |
2401 uint8_t halfH[SIZE*SIZE];\ | |
2402 uint8_t halfV[SIZE*SIZE];\ | |
2403 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2404 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2405 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2406 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2407 }\ | |
2408 \ | |
2409 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2410 uint8_t full[SIZE*(SIZE+5)];\ | |
2411 uint8_t * const full_mid= full + SIZE*2;\ | |
2412 uint8_t halfH[SIZE*SIZE];\ | |
2413 uint8_t halfV[SIZE*SIZE];\ | |
2414 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2415 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2416 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2417 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2418 }\ | |
2419 \ | |
2420 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2421 int16_t tmp[SIZE*(SIZE+5)];\ | |
2422 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ | |
2423 }\ | |
2424 \ | |
2425 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2426 int16_t tmp[SIZE*(SIZE+5)];\ | |
2427 uint8_t halfH[SIZE*SIZE];\ | |
2428 uint8_t halfHV[SIZE*SIZE];\ | |
2429 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2430 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2431 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2432 }\ | |
2433 \ | |
2434 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2435 int16_t tmp[SIZE*(SIZE+5)];\ | |
2436 uint8_t halfH[SIZE*SIZE];\ | |
2437 uint8_t halfHV[SIZE*SIZE];\ | |
2438 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2439 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2440 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2441 }\ | |
2442 \ | |
2443 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2444 uint8_t full[SIZE*(SIZE+5)];\ | |
2445 uint8_t * const full_mid= full + SIZE*2;\ | |
2446 int16_t tmp[SIZE*(SIZE+5)];\ | |
2447 uint8_t halfV[SIZE*SIZE];\ | |
2448 uint8_t halfHV[SIZE*SIZE];\ | |
2449 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2450 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2451 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2452 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2453 }\ | |
2454 \ | |
2455 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2456 uint8_t full[SIZE*(SIZE+5)];\ | |
2457 uint8_t * const full_mid= full + SIZE*2;\ | |
2458 int16_t tmp[SIZE*(SIZE+5)];\ | |
2459 uint8_t halfV[SIZE*SIZE];\ | |
2460 uint8_t halfHV[SIZE*SIZE];\ | |
2461 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2462 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2463 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2464 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2465 }\ | |
2466 | |
2467 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) | |
2468 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) | |
2469 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
2470 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) | |
2471 #define op2_put(a, b) a = cm[((b) + 512)>>10] | |
2472 | |
2473 H264_LOWPASS(put_ , op_put, op2_put) | |
2474 H264_LOWPASS(avg_ , op_avg, op2_avg) | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
2475 H264_MC(put_, 2) |
1168 | 2476 H264_MC(put_, 4) |
2477 H264_MC(put_, 8) | |
2478 H264_MC(put_, 16) | |
2479 H264_MC(avg_, 4) | |
2480 H264_MC(avg_, 8) | |
2481 H264_MC(avg_, 16) | |
2482 | |
2483 #undef op_avg | |
2484 #undef op_put | |
2485 #undef op2_avg | |
2486 #undef op2_put | |
2487 #endif | |
2488 | |
2448 | 2489 #define op_scale1(x) block[x] = clip_uint8( (block[x]*weight + offset) >> log2_denom ) |
2490 #define op_scale2(x) dst[x] = clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) | |
2415 | 2491 #define H264_WEIGHT(W,H) \ |
2492 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \ | |
3029 | 2493 int y; \ |
2415 | 2494 offset <<= log2_denom; \ |
2495 if(log2_denom) offset += 1<<(log2_denom-1); \ | |
2496 for(y=0; y<H; y++, block += stride){ \ | |
2497 op_scale1(0); \ | |
2498 op_scale1(1); \ | |
2499 if(W==2) continue; \ | |
2500 op_scale1(2); \ | |
2501 op_scale1(3); \ | |
2502 if(W==4) continue; \ | |
2503 op_scale1(4); \ | |
2504 op_scale1(5); \ | |
2505 op_scale1(6); \ | |
2506 op_scale1(7); \ | |
2507 if(W==8) continue; \ | |
2508 op_scale1(8); \ | |
2509 op_scale1(9); \ | |
2510 op_scale1(10); \ | |
2511 op_scale1(11); \ | |
2512 op_scale1(12); \ | |
2513 op_scale1(13); \ | |
2514 op_scale1(14); \ | |
2515 op_scale1(15); \ | |
2516 } \ | |
2517 } \ | |
3029 | 2518 static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \ |
2519 int y; \ | |
2520 offset = ((offset + 1) | 1) << log2_denom; \ | |
2415 | 2521 for(y=0; y<H; y++, dst += stride, src += stride){ \ |
2522 op_scale2(0); \ | |
2523 op_scale2(1); \ | |
2524 if(W==2) continue; \ | |
2525 op_scale2(2); \ | |
2526 op_scale2(3); \ | |
2527 if(W==4) continue; \ | |
2528 op_scale2(4); \ | |
2529 op_scale2(5); \ | |
2530 op_scale2(6); \ | |
2531 op_scale2(7); \ | |
2532 if(W==8) continue; \ | |
2533 op_scale2(8); \ | |
2534 op_scale2(9); \ | |
2535 op_scale2(10); \ | |
2536 op_scale2(11); \ | |
2537 op_scale2(12); \ | |
2538 op_scale2(13); \ | |
2539 op_scale2(14); \ | |
2540 op_scale2(15); \ | |
2541 } \ | |
2542 } | |
2543 | |
2544 H264_WEIGHT(16,16) | |
2545 H264_WEIGHT(16,8) | |
2546 H264_WEIGHT(8,16) | |
2547 H264_WEIGHT(8,8) | |
2548 H264_WEIGHT(8,4) | |
2549 H264_WEIGHT(4,8) | |
2550 H264_WEIGHT(4,4) | |
2551 H264_WEIGHT(4,2) | |
2552 H264_WEIGHT(2,4) | |
2553 H264_WEIGHT(2,2) | |
2554 | |
2555 #undef op_scale1 | |
2556 #undef op_scale2 | |
2557 #undef H264_WEIGHT | |
2558 | |
936 | 2559 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ |
2560 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
2561 int i; | |
2562 | |
2563 for(i=0; i<h; i++){ | |
2564 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4]; | |
2565 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4]; | |
2566 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4]; | |
2567 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4]; | |
2568 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4]; | |
2569 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; | |
2570 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; | |
2571 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; | |
2572 dst+=dstStride; | |
2967 | 2573 src+=srcStride; |
936 | 2574 } |
2575 } | |
2576 | |
2577 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ | |
2578 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
2579 int i; | |
2580 | |
2581 for(i=0; i<w; i++){ | |
2582 const int src_1= src[ -srcStride]; | |
2583 const int src0 = src[0 ]; | |
2584 const int src1 = src[ srcStride]; | |
2585 const int src2 = src[2*srcStride]; | |
2586 const int src3 = src[3*srcStride]; | |
2587 const int src4 = src[4*srcStride]; | |
2588 const int src5 = src[5*srcStride]; | |
2589 const int src6 = src[6*srcStride]; | |
2590 const int src7 = src[7*srcStride]; | |
2591 const int src8 = src[8*srcStride]; | |
2592 const int src9 = src[9*srcStride]; | |
2593 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; | |
2594 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; | |
2595 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; | |
2596 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; | |
2597 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; | |
2598 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; | |
2599 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; | |
2600 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; | |
2601 src++; | |
2602 dst++; | |
2603 } | |
2604 } | |
2605 | |
2606 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){ | |
2607 put_pixels8_c(dst, src, stride, 8); | |
2608 } | |
2609 | |
2610 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ | |
2611 uint8_t half[64]; | |
2612 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | |
2613 put_pixels8_l2(dst, src, half, stride, stride, 8, 8); | |
2614 } | |
2615 | |
2616 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ | |
2617 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); | |
2618 } | |
2619 | |
2620 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ | |
2621 uint8_t half[64]; | |
2622 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | |
2623 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8); | |
2624 } | |
2625 | |
2626 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ | |
2627 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); | |
2628 } | |
2629 | |
2630 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ | |
2631 uint8_t halfH[88]; | |
2632 uint8_t halfV[64]; | |
2633 uint8_t halfHV[64]; | |
2634 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2635 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); | |
2636 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | |
2637 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | |
2638 } | |
2639 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ | |
2640 uint8_t halfH[88]; | |
2641 uint8_t halfV[64]; | |
2642 uint8_t halfHV[64]; | |
2643 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2644 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); | |
2645 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | |
2646 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | |
2647 } | |
2648 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ | |
2649 uint8_t halfH[88]; | |
2650 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2651 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); | |
2652 } | |
2653 | |
1644 | 2654 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){ |
2655 int x; | |
2656 const int strength= ff_h263_loop_filter_strength[qscale]; | |
2967 | 2657 |
1644 | 2658 for(x=0; x<8; x++){ |
2659 int d1, d2, ad1; | |
2660 int p0= src[x-2*stride]; | |
2661 int p1= src[x-1*stride]; | |
2662 int p2= src[x+0*stride]; | |
2663 int p3= src[x+1*stride]; | |
2664 int d = (p0 - p3 + 4*(p2 - p1)) / 8; | |
2665 | |
2666 if (d<-2*strength) d1= 0; | |
2667 else if(d<- strength) d1=-2*strength - d; | |
2668 else if(d< strength) d1= d; | |
2669 else if(d< 2*strength) d1= 2*strength - d; | |
2670 else d1= 0; | |
2967 | 2671 |
1644 | 2672 p1 += d1; |
2673 p2 -= d1; | |
2674 if(p1&256) p1= ~(p1>>31); | |
2675 if(p2&256) p2= ~(p2>>31); | |
2967 | 2676 |
1644 | 2677 src[x-1*stride] = p1; |
2678 src[x+0*stride] = p2; | |
2679 | |
1645 | 2680 ad1= ABS(d1)>>1; |
2967 | 2681 |
1644 | 2682 d2= clip((p0-p3)/4, -ad1, ad1); |
2967 | 2683 |
1644 | 2684 src[x-2*stride] = p0 - d2; |
2685 src[x+ stride] = p3 + d2; | |
2686 } | |
2687 } | |
2688 | |
2689 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ | |
2690 int y; | |
2691 const int strength= ff_h263_loop_filter_strength[qscale]; | |
2967 | 2692 |
1644 | 2693 for(y=0; y<8; y++){ |
2694 int d1, d2, ad1; | |
2695 int p0= src[y*stride-2]; | |
2696 int p1= src[y*stride-1]; | |
2697 int p2= src[y*stride+0]; | |
2698 int p3= src[y*stride+1]; | |
2699 int d = (p0 - p3 + 4*(p2 - p1)) / 8; | |
2700 | |
2701 if (d<-2*strength) d1= 0; | |
2702 else if(d<- strength) d1=-2*strength - d; | |
2703 else if(d< strength) d1= d; | |
2704 else if(d< 2*strength) d1= 2*strength - d; | |
2705 else d1= 0; | |
2967 | 2706 |
1644 | 2707 p1 += d1; |
2708 p2 -= d1; | |
2709 if(p1&256) p1= ~(p1>>31); | |
2710 if(p2&256) p2= ~(p2>>31); | |
2967 | 2711 |
1644 | 2712 src[y*stride-1] = p1; |
2713 src[y*stride+0] = p2; | |
2714 | |
2715 ad1= ABS(d1)>>1; | |
2967 | 2716 |
1644 | 2717 d2= clip((p0-p3)/4, -ad1, ad1); |
2967 | 2718 |
1644 | 2719 src[y*stride-2] = p0 - d2; |
2720 src[y*stride+1] = p3 + d2; | |
2721 } | |
2722 } | |
936 | 2723 |
2045 | 2724 static void h261_loop_filter_c(uint8_t *src, int stride){ |
2725 int x,y,xy,yz; | |
2726 int temp[64]; | |
2727 | |
2728 for(x=0; x<8; x++){ | |
2729 temp[x ] = 4*src[x ]; | |
2730 temp[x + 7*8] = 4*src[x + 7*stride]; | |
2731 } | |
2732 for(y=1; y<7; y++){ | |
2733 for(x=0; x<8; x++){ | |
2734 xy = y * stride + x; | |
2735 yz = y * 8 + x; | |
2736 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride]; | |
2044
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2737 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2738 } |
2967 | 2739 |
2045 | 2740 for(y=0; y<8; y++){ |
2741 src[ y*stride] = (temp[ y*8] + 2)>>2; | |
2742 src[7+y*stride] = (temp[7+y*8] + 2)>>2; | |
2743 for(x=1; x<7; x++){ | |
2744 xy = y * stride + x; | |
2745 yz = y * 8 + x; | |
2746 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4; | |
2044
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2747 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2748 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2749 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2750 |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2751 static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) |
2633 | 2752 { |
2753 int i, d; | |
2754 for( i = 0; i < 4; i++ ) { | |
2755 if( tc0[i] < 0 ) { | |
2756 pix += 4*ystride; | |
2757 continue; | |
2758 } | |
2759 for( d = 0; d < 4; d++ ) { | |
2760 const int p0 = pix[-1*xstride]; | |
2761 const int p1 = pix[-2*xstride]; | |
2762 const int p2 = pix[-3*xstride]; | |
2763 const int q0 = pix[0]; | |
2764 const int q1 = pix[1*xstride]; | |
2765 const int q2 = pix[2*xstride]; | |
2967 | 2766 |
2633 | 2767 if( ABS( p0 - q0 ) < alpha && |
2768 ABS( p1 - p0 ) < beta && | |
2769 ABS( q1 - q0 ) < beta ) { | |
2967 | 2770 |
2633 | 2771 int tc = tc0[i]; |
2772 int i_delta; | |
2967 | 2773 |
2633 | 2774 if( ABS( p2 - p0 ) < beta ) { |
2651 | 2775 pix[-2*xstride] = p1 + clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] ); |
2633 | 2776 tc++; |
2777 } | |
2778 if( ABS( q2 - q0 ) < beta ) { | |
2651 | 2779 pix[ xstride] = q1 + clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] ); |
2633 | 2780 tc++; |
2781 } | |
2967 | 2782 |
2633 | 2783 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); |
2784 pix[-xstride] = clip_uint8( p0 + i_delta ); /* p0' */ | |
2785 pix[0] = clip_uint8( q0 - i_delta ); /* q0' */ | |
2786 } | |
2787 pix += ystride; | |
2788 } | |
2789 } | |
2790 } | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2791 static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
2633 | 2792 { |
2793 h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0); | |
2794 } | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2795 static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
2633 | 2796 { |
2797 h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0); | |
2798 } | |
2799 | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2800 static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) |
2633 | 2801 { |
2802 int i, d; | |
2803 for( i = 0; i < 4; i++ ) { | |
2804 const int tc = tc0[i]; | |
2805 if( tc <= 0 ) { | |
2806 pix += 2*ystride; | |
2807 continue; | |
2808 } | |
2809 for( d = 0; d < 2; d++ ) { | |
2810 const int p0 = pix[-1*xstride]; | |
2811 const int p1 = pix[-2*xstride]; | |
2812 const int q0 = pix[0]; | |
2813 const int q1 = pix[1*xstride]; | |
2814 | |
2815 if( ABS( p0 - q0 ) < alpha && | |
2816 ABS( p1 - p0 ) < beta && | |
2817 ABS( q1 - q0 ) < beta ) { | |
2818 | |
2819 int delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); | |
2820 | |
2821 pix[-xstride] = clip_uint8( p0 + delta ); /* p0' */ | |
2822 pix[0] = clip_uint8( q0 - delta ); /* q0' */ | |
2823 } | |
2824 pix += ystride; | |
2825 } | |
2826 } | |
2827 } | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2828 static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
2633 | 2829 { |
2830 h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0); | |
2831 } | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2832 static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
2633 | 2833 { |
2834 h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0); | |
2835 } | |
2836 | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2837 static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2838 { |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2839 int d; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2840 for( d = 0; d < 8; d++ ) { |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2841 const int p0 = pix[-1*xstride]; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2842 const int p1 = pix[-2*xstride]; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2843 const int q0 = pix[0]; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2844 const int q1 = pix[1*xstride]; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2845 |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2846 if( ABS( p0 - q0 ) < alpha && |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2847 ABS( p1 - p0 ) < beta && |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2848 ABS( q1 - q0 ) < beta ) { |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2849 |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2850 pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2851 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2852 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2853 pix += ystride; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2854 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2855 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2856 static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2857 { |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2858 h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta); |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2859 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2860 static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2861 { |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2862 h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta); |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2863 } |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
2864 |
1708 | 2865 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2866 { |
2867 int s, i; | |
2868 | |
2869 s = 0; | |
1708 | 2870 for(i=0;i<h;i++) { |
0 | 2871 s += abs(pix1[0] - pix2[0]); |
2872 s += abs(pix1[1] - pix2[1]); | |
2873 s += abs(pix1[2] - pix2[2]); | |
2874 s += abs(pix1[3] - pix2[3]); | |
2875 s += abs(pix1[4] - pix2[4]); | |
2876 s += abs(pix1[5] - pix2[5]); | |
2877 s += abs(pix1[6] - pix2[6]); | |
2878 s += abs(pix1[7] - pix2[7]); | |
2879 s += abs(pix1[8] - pix2[8]); | |
2880 s += abs(pix1[9] - pix2[9]); | |
2881 s += abs(pix1[10] - pix2[10]); | |
2882 s += abs(pix1[11] - pix2[11]); | |
2883 s += abs(pix1[12] - pix2[12]); | |
2884 s += abs(pix1[13] - pix2[13]); | |
2885 s += abs(pix1[14] - pix2[14]); | |
2886 s += abs(pix1[15] - pix2[15]); | |
2887 pix1 += line_size; | |
2888 pix2 += line_size; | |
2889 } | |
2890 return s; | |
2891 } | |
2892 | |
1708 | 2893 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2894 { |
2895 int s, i; | |
2896 | |
2897 s = 0; | |
1708 | 2898 for(i=0;i<h;i++) { |
0 | 2899 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
2900 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
2901 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
2902 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
2903 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
2904 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
2905 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
2906 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
2907 s += abs(pix1[8] - avg2(pix2[8], pix2[9])); | |
2908 s += abs(pix1[9] - avg2(pix2[9], pix2[10])); | |
2909 s += abs(pix1[10] - avg2(pix2[10], pix2[11])); | |
2910 s += abs(pix1[11] - avg2(pix2[11], pix2[12])); | |
2911 s += abs(pix1[12] - avg2(pix2[12], pix2[13])); | |
2912 s += abs(pix1[13] - avg2(pix2[13], pix2[14])); | |
2913 s += abs(pix1[14] - avg2(pix2[14], pix2[15])); | |
2914 s += abs(pix1[15] - avg2(pix2[15], pix2[16])); | |
2915 pix1 += line_size; | |
2916 pix2 += line_size; | |
2917 } | |
2918 return s; | |
2919 } | |
2920 | |
1708 | 2921 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2922 { |
2923 int s, i; | |
1064 | 2924 uint8_t *pix3 = pix2 + line_size; |
0 | 2925 |
2926 s = 0; | |
1708 | 2927 for(i=0;i<h;i++) { |
0 | 2928 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
2929 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
2930 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
2931 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
2932 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
2933 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
2934 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
2935 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
2936 s += abs(pix1[8] - avg2(pix2[8], pix3[8])); | |
2937 s += abs(pix1[9] - avg2(pix2[9], pix3[9])); | |
2938 s += abs(pix1[10] - avg2(pix2[10], pix3[10])); | |
2939 s += abs(pix1[11] - avg2(pix2[11], pix3[11])); | |
2940 s += abs(pix1[12] - avg2(pix2[12], pix3[12])); | |
2941 s += abs(pix1[13] - avg2(pix2[13], pix3[13])); | |
2942 s += abs(pix1[14] - avg2(pix2[14], pix3[14])); | |
2943 s += abs(pix1[15] - avg2(pix2[15], pix3[15])); | |
2944 pix1 += line_size; | |
2945 pix2 += line_size; | |
2946 pix3 += line_size; | |
2947 } | |
2948 return s; | |
2949 } | |
2950 | |
1708 | 2951 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2952 { |
2953 int s, i; | |
1064 | 2954 uint8_t *pix3 = pix2 + line_size; |
0 | 2955 |
2956 s = 0; | |
1708 | 2957 for(i=0;i<h;i++) { |
0 | 2958 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
2959 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
2960 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
2961 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
2962 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
2963 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
2964 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
2965 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
2966 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); | |
2967 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); | |
2968 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); | |
2969 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); | |
2970 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); | |
2971 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); | |
2972 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); | |
2973 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); | |
2974 pix1 += line_size; | |
2975 pix2 += line_size; | |
2976 pix3 += line_size; | |
2977 } | |
2978 return s; | |
2979 } | |
2980 | |
1708 | 2981 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2982 { |
2983 int s, i; | |
2984 | |
2985 s = 0; | |
1708 | 2986 for(i=0;i<h;i++) { |
294 | 2987 s += abs(pix1[0] - pix2[0]); |
2988 s += abs(pix1[1] - pix2[1]); | |
2989 s += abs(pix1[2] - pix2[2]); | |
2990 s += abs(pix1[3] - pix2[3]); | |
2991 s += abs(pix1[4] - pix2[4]); | |
2992 s += abs(pix1[5] - pix2[5]); | |
2993 s += abs(pix1[6] - pix2[6]); | |
2994 s += abs(pix1[7] - pix2[7]); | |
2995 pix1 += line_size; | |
2996 pix2 += line_size; | |
2997 } | |
2998 return s; | |
2999 } | |
3000 | |
1708 | 3001 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 3002 { |
3003 int s, i; | |
3004 | |
3005 s = 0; | |
1708 | 3006 for(i=0;i<h;i++) { |
294 | 3007 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
3008 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
3009 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
3010 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
3011 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
3012 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
3013 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
3014 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
3015 pix1 += line_size; | |
3016 pix2 += line_size; | |
3017 } | |
3018 return s; | |
3019 } | |
3020 | |
1708 | 3021 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 3022 { |
3023 int s, i; | |
1064 | 3024 uint8_t *pix3 = pix2 + line_size; |
294 | 3025 |
3026 s = 0; | |
1708 | 3027 for(i=0;i<h;i++) { |
294 | 3028 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
3029 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
3030 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
3031 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
3032 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
3033 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
3034 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
3035 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
3036 pix1 += line_size; | |
3037 pix2 += line_size; | |
3038 pix3 += line_size; | |
3039 } | |
3040 return s; | |
3041 } | |
3042 | |
1708 | 3043 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 3044 { |
3045 int s, i; | |
1064 | 3046 uint8_t *pix3 = pix2 + line_size; |
294 | 3047 |
3048 s = 0; | |
1708 | 3049 for(i=0;i<h;i++) { |
294 | 3050 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
3051 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
3052 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
3053 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
3054 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
3055 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
3056 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
3057 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
3058 pix1 += line_size; | |
3059 pix2 += line_size; | |
3060 pix3 += line_size; | |
3061 } | |
3062 return s; | |
3063 } | |
3064 | |
2834 | 3065 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ |
3066 MpegEncContext *c = v; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3067 int score1=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3068 int score2=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3069 int x,y; |
2066 | 3070 |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3071 for(y=0; y<h; y++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3072 for(x=0; x<16; x++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3073 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3074 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3075 if(y+1<h){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3076 for(x=0; x<15; x++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3077 score2+= ABS( s1[x ] - s1[x +stride] |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3078 - s1[x+1] + s1[x+1+stride]) |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3079 -ABS( s2[x ] - s2[x +stride] |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3080 - s2[x+1] + s2[x+1+stride]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3081 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3082 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3083 s1+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3084 s2+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3085 } |
2066 | 3086 |
3087 if(c) return score1 + ABS(score2)*c->avctx->nsse_weight; | |
3088 else return score1 + ABS(score2)*8; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3089 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3090 |
2834 | 3091 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ |
3092 MpegEncContext *c = v; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3093 int score1=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3094 int score2=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3095 int x,y; |
2967 | 3096 |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3097 for(y=0; y<h; y++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3098 for(x=0; x<8; x++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3099 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3100 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3101 if(y+1<h){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3102 for(x=0; x<7; x++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3103 score2+= ABS( s1[x ] - s1[x +stride] |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3104 - s1[x+1] + s1[x+1+stride]) |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3105 -ABS( s2[x ] - s2[x +stride] |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3106 - s2[x+1] + s2[x+1+stride]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3107 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3108 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3109 s1+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3110 s2+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3111 } |
2967 | 3112 |
2066 | 3113 if(c) return score1 + ABS(score2)*c->avctx->nsse_weight; |
3114 else return score1 + ABS(score2)*8; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3115 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3116 |
1784 | 3117 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ |
3118 int i; | |
3119 unsigned int sum=0; | |
3120 | |
3121 for(i=0; i<8*8; i++){ | |
3122 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT)); | |
3123 int w= weight[i]; | |
3124 b>>= RECON_SHIFT; | |
3125 assert(-512<b && b<512); | |
3126 | |
3127 sum += (w*b)*(w*b)>>4; | |
3128 } | |
3129 return sum>>2; | |
3130 } | |
3131 | |
3132 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){ | |
3133 int i; | |
3134 | |
3135 for(i=0; i<8*8; i++){ | |
3136 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT); | |
2967 | 3137 } |
1784 | 3138 } |
3139 | |
1100 | 3140 /** |
3141 * permutes an 8x8 block. | |
1101 | 3142 * @param block the block which will be permuted according to the given permutation vector |
1100 | 3143 * @param permutation the permutation vector |
3144 * @param last the last non zero coefficient in scantable order, used to speed the permutation up | |
2967 | 3145 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not |
1101 | 3146 * (inverse) permutated to scantable order! |
1100 | 3147 */ |
1064 | 3148 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last) |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
3149 { |
764 | 3150 int i; |
945 | 3151 DCTELEM temp[64]; |
2967 | 3152 |
764 | 3153 if(last<=0) return; |
882 | 3154 //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
3155 |
764 | 3156 for(i=0; i<=last; i++){ |
3157 const int j= scantable[i]; | |
3158 temp[j]= block[j]; | |
3159 block[j]=0; | |
3160 } | |
2967 | 3161 |
764 | 3162 for(i=0; i<=last; i++){ |
3163 const int j= scantable[i]; | |
3164 const int perm_j= permutation[j]; | |
3165 block[perm_j]= temp[j]; | |
3166 } | |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
3167 } |
34 | 3168 |
1729 | 3169 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ |
3170 return 0; | |
3171 } | |
3172 | |
3173 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ | |
3174 int i; | |
2967 | 3175 |
1729 | 3176 memset(cmp, 0, sizeof(void*)*5); |
2967 | 3177 |
1729 | 3178 for(i=0; i<5; i++){ |
3179 switch(type&0xFF){ | |
3180 case FF_CMP_SAD: | |
3181 cmp[i]= c->sad[i]; | |
3182 break; | |
3183 case FF_CMP_SATD: | |
3184 cmp[i]= c->hadamard8_diff[i]; | |
3185 break; | |
3186 case FF_CMP_SSE: | |
3187 cmp[i]= c->sse[i]; | |
3188 break; | |
3189 case FF_CMP_DCT: | |
3190 cmp[i]= c->dct_sad[i]; | |
3191 break; | |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3192 case FF_CMP_DCT264: |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3193 cmp[i]= c->dct264_sad[i]; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3194 break; |
2382 | 3195 case FF_CMP_DCTMAX: |
3196 cmp[i]= c->dct_max[i]; | |
3197 break; | |
1729 | 3198 case FF_CMP_PSNR: |
3199 cmp[i]= c->quant_psnr[i]; | |
3200 break; | |
3201 case FF_CMP_BIT: | |
3202 cmp[i]= c->bit[i]; | |
3203 break; | |
3204 case FF_CMP_RD: | |
3205 cmp[i]= c->rd[i]; | |
3206 break; | |
3207 case FF_CMP_VSAD: | |
3208 cmp[i]= c->vsad[i]; | |
3209 break; | |
3210 case FF_CMP_VSSE: | |
3211 cmp[i]= c->vsse[i]; | |
3212 break; | |
3213 case FF_CMP_ZERO: | |
3214 cmp[i]= zero_cmp; | |
3215 break; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3216 case FF_CMP_NSSE: |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3217 cmp[i]= c->nsse[i]; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3218 break; |
2184 | 3219 case FF_CMP_W53: |
3220 cmp[i]= c->w53[i]; | |
3221 break; | |
3222 case FF_CMP_W97: | |
3223 cmp[i]= c->w97[i]; | |
3224 break; | |
1729 | 3225 default: |
3226 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); | |
3227 } | |
3228 } | |
3229 } | |
3230 | |
1101 | 3231 /** |
3232 * memset(blocks, 0, sizeof(DCTELEM)*6*64) | |
3233 */ | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3234 static void clear_blocks_c(DCTELEM *blocks) |
296 | 3235 { |
3236 memset(blocks, 0, sizeof(DCTELEM)*6*64); | |
3237 } | |
3238 | |
866 | 3239 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ |
3240 int i; | |
996
ad44196ea483
add/diff_bytes bugfix patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>)
michaelni
parents:
984
diff
changeset
|
3241 for(i=0; i+7<w; i+=8){ |
866 | 3242 dst[i+0] += src[i+0]; |
3243 dst[i+1] += src[i+1]; | |
3244 dst[i+2] += src[i+2]; | |
3245 dst[i+3] += src[i+3]; | |
3246 dst[i+4] += src[i+4]; | |
3247 dst[i+5] += src[i+5]; | |
3248 dst[i+6] += src[i+6]; | |
3249 dst[i+7] += src[i+7]; | |
3250 } | |
3251 for(; i<w; i++) | |
3252 dst[i+0] += src[i+0]; | |
3253 } | |
3254 | |
3255 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ | |
3256 int i; | |
996
ad44196ea483
add/diff_bytes bugfix patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>)
michaelni
parents:
984
diff
changeset
|
3257 for(i=0; i+7<w; i+=8){ |
866 | 3258 dst[i+0] = src1[i+0]-src2[i+0]; |
3259 dst[i+1] = src1[i+1]-src2[i+1]; | |
3260 dst[i+2] = src1[i+2]-src2[i+2]; | |
3261 dst[i+3] = src1[i+3]-src2[i+3]; | |
3262 dst[i+4] = src1[i+4]-src2[i+4]; | |
3263 dst[i+5] = src1[i+5]-src2[i+5]; | |
3264 dst[i+6] = src1[i+6]-src2[i+6]; | |
3265 dst[i+7] = src1[i+7]-src2[i+7]; | |
3266 } | |
3267 for(; i<w; i++) | |
3268 dst[i+0] = src1[i+0]-src2[i+0]; | |
3269 } | |
3270 | |
1527 | 3271 static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ |
3272 int i; | |
3273 uint8_t l, lt; | |
3274 | |
3275 l= *left; | |
3276 lt= *left_top; | |
3277 | |
3278 for(i=0; i<w; i++){ | |
3279 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); | |
3280 lt= src1[i]; | |
3281 l= src2[i]; | |
3282 dst[i]= l - pred; | |
2967 | 3283 } |
1527 | 3284 |
3285 *left= l; | |
3286 *left_top= lt; | |
3287 } | |
3288 | |
936 | 3289 #define BUTTERFLY2(o1,o2,i1,i2) \ |
3290 o1= (i1)+(i2);\ | |
3291 o2= (i1)-(i2); | |
3292 | |
3293 #define BUTTERFLY1(x,y) \ | |
3294 {\ | |
3295 int a,b;\ | |
3296 a= x;\ | |
3297 b= y;\ | |
3298 x= a+b;\ | |
3299 y= a-b;\ | |
3300 } | |
3301 | |
3302 #define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y))) | |
3303 | |
1708 | 3304 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ |
936 | 3305 int i; |
3306 int temp[64]; | |
3307 int sum=0; | |
2967 | 3308 |
1708 | 3309 assert(h==8); |
936 | 3310 |
3311 for(i=0; i<8; i++){ | |
3312 //FIXME try pointer walks | |
3313 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]); | |
3314 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]); | |
3315 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]); | |
3316 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]); | |
2967 | 3317 |
936 | 3318 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); |
3319 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | |
3320 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | |
3321 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | |
2967 | 3322 |
936 | 3323 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); |
3324 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | |
3325 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | |
3326 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | |
3327 } | |
3328 | |
3329 for(i=0; i<8; i++){ | |
3330 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | |
3331 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | |
3332 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | |
3333 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | |
2967 | 3334 |
936 | 3335 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); |
3336 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | |
3337 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | |
3338 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | |
3339 | |
2967 | 3340 sum += |
936 | 3341 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) |
3342 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | |
3343 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | |
3344 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | |
3345 } | |
3346 #if 0 | |
3347 static int maxi=0; | |
3348 if(sum>maxi){ | |
3349 maxi=sum; | |
3350 printf("MAX:%d\n", maxi); | |
3351 } | |
3352 #endif | |
3353 return sum; | |
3354 } | |
3355 | |
1729 | 3356 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){ |
936 | 3357 int i; |
3358 int temp[64]; | |
3359 int sum=0; | |
2967 | 3360 |
1729 | 3361 assert(h==8); |
2967 | 3362 |
936 | 3363 for(i=0; i<8; i++){ |
3364 //FIXME try pointer walks | |
1729 | 3365 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]); |
3366 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]); | |
3367 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]); | |
3368 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]); | |
2967 | 3369 |
936 | 3370 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); |
3371 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | |
3372 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | |
3373 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | |
2967 | 3374 |
936 | 3375 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); |
3376 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | |
3377 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | |
3378 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | |
3379 } | |
3380 | |
3381 for(i=0; i<8; i++){ | |
3382 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | |
3383 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | |
3384 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | |
3385 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | |
2967 | 3386 |
936 | 3387 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); |
3388 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | |
3389 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | |
3390 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | |
2967 | 3391 |
3392 sum += | |
936 | 3393 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) |
3394 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | |
3395 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | |
3396 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | |
3397 } | |
2967 | 3398 |
1729 | 3399 sum -= ABS(temp[8*0] + temp[8*4]); // -mean |
2967 | 3400 |
936 | 3401 return sum; |
3402 } | |
3403 | |
1708 | 3404 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
936 | 3405 MpegEncContext * const s= (MpegEncContext *)c; |
3089 | 3406 DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); |
1016 | 3407 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
936 | 3408 int sum=0, i; |
2967 | 3409 |
1708 | 3410 assert(h==8); |
936 | 3411 |
3412 s->dsp.diff_pixels(temp, src1, src2, stride); | |
1092 | 3413 s->dsp.fdct(temp); |
936 | 3414 |
3415 for(i=0; i<64; i++) | |
3416 sum+= ABS(temp[i]); | |
2967 | 3417 |
936 | 3418 return sum; |
3419 } | |
3420 | |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3421 #ifdef CONFIG_GPL |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3422 #define DCT8_1D {\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3423 const int s07 = SRC(0) + SRC(7);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3424 const int s16 = SRC(1) + SRC(6);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3425 const int s25 = SRC(2) + SRC(5);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3426 const int s34 = SRC(3) + SRC(4);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3427 const int a0 = s07 + s34;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3428 const int a1 = s16 + s25;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3429 const int a2 = s07 - s34;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3430 const int a3 = s16 - s25;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3431 const int d07 = SRC(0) - SRC(7);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3432 const int d16 = SRC(1) - SRC(6);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3433 const int d25 = SRC(2) - SRC(5);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3434 const int d34 = SRC(3) - SRC(4);\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3435 const int a4 = d16 + d25 + (d07 + (d07>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3436 const int a5 = d07 - d34 - (d25 + (d25>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3437 const int a6 = d07 + d34 - (d16 + (d16>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3438 const int a7 = d16 - d25 + (d34 + (d34>>1));\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3439 DST(0, a0 + a1 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3440 DST(1, a4 + (a7>>2)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3441 DST(2, a2 + (a3>>1)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3442 DST(3, a5 + (a6>>2)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3443 DST(4, a0 - a1 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3444 DST(5, a6 - (a5>>2)) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3445 DST(6, (a2>>1) - a3 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3446 DST(7, (a4>>2) - a7 ) ;\ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3447 } |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3448 |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3449 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3450 MpegEncContext * const s= (MpegEncContext *)c; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3451 int16_t dct[8][8]; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3452 int i; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3453 int sum=0; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3454 |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3455 s->dsp.diff_pixels(dct, src1, src2, stride); |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3456 |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3457 #define SRC(x) dct[i][x] |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3458 #define DST(x,v) dct[i][x]= v |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3459 for( i = 0; i < 8; i++ ) |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3460 DCT8_1D |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3461 #undef SRC |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3462 #undef DST |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3463 |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3464 #define SRC(x) dct[x][i] |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3465 #define DST(x,v) sum += ABS(v) |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3466 for( i = 0; i < 8; i++ ) |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3467 DCT8_1D |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3468 #undef SRC |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3469 #undef DST |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3470 return sum; |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3471 } |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3472 #endif |
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3473 |
2382 | 3474 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
3475 MpegEncContext * const s= (MpegEncContext *)c; | |
3089 | 3476 DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); |
2382 | 3477 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
3478 int sum=0, i; | |
2967 | 3479 |
2382 | 3480 assert(h==8); |
3481 | |
3482 s->dsp.diff_pixels(temp, src1, src2, stride); | |
3483 s->dsp.fdct(temp); | |
3484 | |
3485 for(i=0; i<64; i++) | |
3486 sum= FFMAX(sum, ABS(temp[i])); | |
2967 | 3487 |
2382 | 3488 return sum; |
3489 } | |
3490 | |
1008 | 3491 void simple_idct(DCTELEM *block); //FIXME |
936 | 3492 |
1708 | 3493 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
936 | 3494 MpegEncContext * const s= (MpegEncContext *)c; |
3089 | 3495 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]); |
1016 | 3496 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
3497 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; | |
936 | 3498 int sum=0, i; |
3499 | |
1708 | 3500 assert(h==8); |
936 | 3501 s->mb_intra=0; |
2967 | 3502 |
936 | 3503 s->dsp.diff_pixels(temp, src1, src2, stride); |
2967 | 3504 |
936 | 3505 memcpy(bak, temp, 64*sizeof(DCTELEM)); |
2967 | 3506 |
1013 | 3507 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
1689 | 3508 s->dct_unquantize_inter(s, temp, 0, s->qscale); |
2967 | 3509 simple_idct(temp); //FIXME |
3510 | |
936 | 3511 for(i=0; i<64; i++) |
3512 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); | |
2967 | 3513 |
936 | 3514 return sum; |
3515 } | |
3516 | |
1708 | 3517 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
1007 | 3518 MpegEncContext * const s= (MpegEncContext *)c; |
1064 | 3519 const uint8_t *scantable= s->intra_scantable.permutated; |
3089 | 3520 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); |
3521 DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]); | |
1016 | 3522 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
3523 uint8_t * const bak= (uint8_t*)aligned_bak; | |
1007 | 3524 int i, last, run, bits, level, distoration, start_i; |
3525 const int esc_length= s->ac_esc_length; | |
3526 uint8_t * length; | |
3527 uint8_t * last_length; | |
2967 | 3528 |
1708 | 3529 assert(h==8); |
3530 | |
1007 | 3531 for(i=0; i<8; i++){ |
3532 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0]; | |
3533 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1]; | |
3534 } | |
3535 | |
3536 s->dsp.diff_pixels(temp, src1, src2, stride); | |
3537 | |
1013 | 3538 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
3539 | |
3540 bits=0; | |
2967 | 3541 |
1013 | 3542 if (s->mb_intra) { |
2967 | 3543 start_i = 1; |
1013 | 3544 length = s->intra_ac_vlc_length; |
3545 last_length= s->intra_ac_vlc_last_length; | |
3546 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma | |
3547 } else { | |
3548 start_i = 0; | |
3549 length = s->inter_ac_vlc_length; | |
3550 last_length= s->inter_ac_vlc_last_length; | |
3551 } | |
2967 | 3552 |
1013 | 3553 if(last>=start_i){ |
1007 | 3554 run=0; |
3555 for(i=start_i; i<last; i++){ | |
3556 int j= scantable[i]; | |
3557 level= temp[j]; | |
2967 | 3558 |
1007 | 3559 if(level){ |
3560 level+=64; | |
3561 if((level&(~127)) == 0){ | |
3562 bits+= length[UNI_AC_ENC_INDEX(run, level)]; | |
3563 }else | |
3564 bits+= esc_length; | |
3565 run=0; | |
3566 }else | |
3567 run++; | |
3568 } | |
3569 i= scantable[last]; | |
2967 | 3570 |
1011 | 3571 level= temp[i] + 64; |
3572 | |
3573 assert(level - 64); | |
2967 | 3574 |
1007 | 3575 if((level&(~127)) == 0){ |
3576 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | |
3577 }else | |
3578 bits+= esc_length; | |
2967 | 3579 |
1013 | 3580 } |
3581 | |
3582 if(last>=0){ | |
1689 | 3583 if(s->mb_intra) |
3584 s->dct_unquantize_intra(s, temp, 0, s->qscale); | |
3585 else | |
3586 s->dct_unquantize_inter(s, temp, 0, s->qscale); | |
1007 | 3587 } |
2967 | 3588 |
1092 | 3589 s->dsp.idct_add(bak, stride, temp); |
2967 | 3590 |
1708 | 3591 distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8); |
1007 | 3592 |
1013 | 3593 return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7); |
1007 | 3594 } |
3595 | |
1708 | 3596 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
1007 | 3597 MpegEncContext * const s= (MpegEncContext *)c; |
1064 | 3598 const uint8_t *scantable= s->intra_scantable.permutated; |
3089 | 3599 DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); |
1016 | 3600 DCTELEM * const temp= (DCTELEM*)aligned_temp; |
1007 | 3601 int i, last, run, bits, level, start_i; |
3602 const int esc_length= s->ac_esc_length; | |
3603 uint8_t * length; | |
3604 uint8_t * last_length; | |
1708 | 3605 |
3606 assert(h==8); | |
2967 | 3607 |
1013 | 3608 s->dsp.diff_pixels(temp, src1, src2, stride); |
1007 | 3609 |
1013 | 3610 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
3611 | |
3612 bits=0; | |
2967 | 3613 |
1007 | 3614 if (s->mb_intra) { |
2967 | 3615 start_i = 1; |
1007 | 3616 length = s->intra_ac_vlc_length; |
3617 last_length= s->intra_ac_vlc_last_length; | |
1013 | 3618 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma |
1007 | 3619 } else { |
3620 start_i = 0; | |
3621 length = s->inter_ac_vlc_length; | |
3622 last_length= s->inter_ac_vlc_last_length; | |
3623 } | |
2967 | 3624 |
1013 | 3625 if(last>=start_i){ |
1007 | 3626 run=0; |
3627 for(i=start_i; i<last; i++){ | |
3628 int j= scantable[i]; | |
3629 level= temp[j]; | |
2967 | 3630 |
1007 | 3631 if(level){ |
3632 level+=64; | |
3633 if((level&(~127)) == 0){ | |
3634 bits+= length[UNI_AC_ENC_INDEX(run, level)]; | |
3635 }else | |
3636 bits+= esc_length; | |
3637 run=0; | |
3638 }else | |
3639 run++; | |
3640 } | |
3641 i= scantable[last]; | |
2967 | 3642 |
1013 | 3643 level= temp[i] + 64; |
2967 | 3644 |
1013 | 3645 assert(level - 64); |
2967 | 3646 |
1007 | 3647 if((level&(~127)) == 0){ |
3648 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | |
3649 }else | |
3650 bits+= esc_length; | |
3651 } | |
3652 | |
3653 return bits; | |
3654 } | |
3655 | |
1729 | 3656 static int vsad_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ |
3657 int score=0; | |
3658 int x,y; | |
2967 | 3659 |
1729 | 3660 for(y=1; y<h; y++){ |
3661 for(x=0; x<16; x+=4){ | |
2967 | 3662 score+= ABS(s[x ] - s[x +stride]) + ABS(s[x+1] - s[x+1+stride]) |
1729 | 3663 +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]); |
3664 } | |
3665 s+= stride; | |
3666 } | |
2967 | 3667 |
1729 | 3668 return score; |
3669 } | |
3670 | |
3671 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | |
3672 int score=0; | |
3673 int x,y; | |
2967 | 3674 |
1729 | 3675 for(y=1; y<h; y++){ |
3676 for(x=0; x<16; x++){ | |
3677 score+= ABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); | |
3678 } | |
3679 s1+= stride; | |
3680 s2+= stride; | |
3681 } | |
2967 | 3682 |
1729 | 3683 return score; |
3684 } | |
3685 | |
3686 #define SQ(a) ((a)*(a)) | |
3687 static int vsse_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ | |
3688 int score=0; | |
3689 int x,y; | |
2967 | 3690 |
1729 | 3691 for(y=1; y<h; y++){ |
3692 for(x=0; x<16; x+=4){ | |
2967 | 3693 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) |
1729 | 3694 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); |
3695 } | |
3696 s+= stride; | |
3697 } | |
2967 | 3698 |
1729 | 3699 return score; |
3700 } | |
3701 | |
3702 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | |
3703 int score=0; | |
3704 int x,y; | |
2967 | 3705 |
1729 | 3706 for(y=1; y<h; y++){ |
3707 for(x=0; x<16; x++){ | |
3708 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); | |
3709 } | |
3710 s1+= stride; | |
3711 s2+= stride; | |
3712 } | |
2967 | 3713 |
1729 | 3714 return score; |
3715 } | |
3716 | |
1708 | 3717 WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) |
1729 | 3718 WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) |
1708 | 3719 WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) |
3013 | 3720 #ifdef CONFIG_GPL |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
3721 WARPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c) |
3013 | 3722 #endif |
2382 | 3723 WARPER8_16_SQ(dct_max8x8_c, dct_max16_c) |
1708 | 3724 WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) |
3725 WARPER8_16_SQ(rd8x8_c, rd16_c) | |
3726 WARPER8_16_SQ(bit8x8_c, bit16_c) | |
936 | 3727 |
1092 | 3728 /* XXX: those functions should be suppressed ASAP when all IDCTs are |
3729 converted */ | |
3730 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) | |
3731 { | |
3732 j_rev_dct (block); | |
3733 put_pixels_clamped_c(block, dest, line_size); | |
3734 } | |
3735 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) | |
3736 { | |
3737 j_rev_dct (block); | |
3738 add_pixels_clamped_c(block, dest, line_size); | |
3739 } | |
3740 | |
2256 | 3741 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block) |
3742 { | |
3743 j_rev_dct4 (block); | |
3744 put_pixels_clamped4_c(block, dest, line_size); | |
3745 } | |
3746 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block) | |
3747 { | |
3748 j_rev_dct4 (block); | |
3749 add_pixels_clamped4_c(block, dest, line_size); | |
3750 } | |
3751 | |
2257 | 3752 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block) |
3753 { | |
3754 j_rev_dct2 (block); | |
3755 put_pixels_clamped2_c(block, dest, line_size); | |
3756 } | |
3757 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) | |
3758 { | |
3759 j_rev_dct2 (block); | |
3760 add_pixels_clamped2_c(block, dest, line_size); | |
3761 } | |
3762 | |
2259 | 3763 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) |
3764 { | |
3765 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
3766 | |
3767 dest[0] = cm[(block[0] + 4)>>3]; | |
3768 } | |
3769 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) | |
3770 { | |
3771 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
3772 | |
3773 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; | |
3774 } | |
3775 | |
1201 | 3776 /* init static data */ |
3777 void dsputil_static_init(void) | |
0 | 3778 { |
751 | 3779 int i; |
0 | 3780 |
1201 | 3781 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; |
3782 for(i=0;i<MAX_NEG_CROP;i++) { | |
3783 cropTbl[i] = 0; | |
3784 cropTbl[i + MAX_NEG_CROP + 256] = 255; | |
3785 } | |
2967 | 3786 |
1201 | 3787 for(i=0;i<512;i++) { |
3788 squareTbl[i] = (i - 256) * (i - 256); | |
3789 } | |
2967 | 3790 |
1201 | 3791 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; |
3792 } | |
0 | 3793 |
861 | 3794 |
1201 | 3795 void dsputil_init(DSPContext* c, AVCodecContext *avctx) |
3796 { | |
3797 int i; | |
0 | 3798 |
1092 | 3799 #ifdef CONFIG_ENCODERS |
1567 | 3800 if(avctx->dct_algo==FF_DCT_FASTINT) { |
1092 | 3801 c->fdct = fdct_ifast; |
2979 | 3802 c->fdct248 = fdct_ifast248; |
2967 | 3803 } |
1567 | 3804 else if(avctx->dct_algo==FF_DCT_FAAN) { |
1557 | 3805 c->fdct = ff_faandct; |
2979 | 3806 c->fdct248 = ff_faandct248; |
2967 | 3807 } |
1567 | 3808 else { |
1092 | 3809 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default |
2979 | 3810 c->fdct248 = ff_fdct248_islow; |
1567 | 3811 } |
1092 | 3812 #endif //CONFIG_ENCODERS |
3813 | |
2256 | 3814 if(avctx->lowres==1){ |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3815 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO){ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3816 c->idct_put= ff_jref_idct4_put; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3817 c->idct_add= ff_jref_idct4_add; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3818 }else{ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3819 c->idct_put= ff_h264_lowres_idct_put_c; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3820 c->idct_add= ff_h264_lowres_idct_add_c; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3821 } |
2256 | 3822 c->idct = j_rev_dct4; |
1092 | 3823 c->idct_permutation_type= FF_NO_IDCT_PERM; |
2257 | 3824 }else if(avctx->lowres==2){ |
3825 c->idct_put= ff_jref_idct2_put; | |
3826 c->idct_add= ff_jref_idct2_add; | |
3827 c->idct = j_rev_dct2; | |
3828 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
2259 | 3829 }else if(avctx->lowres==3){ |
3830 c->idct_put= ff_jref_idct1_put; | |
3831 c->idct_add= ff_jref_idct1_add; | |
3832 c->idct = j_rev_dct1; | |
3833 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
2256 | 3834 }else{ |
3835 if(avctx->idct_algo==FF_IDCT_INT){ | |
3836 c->idct_put= ff_jref_idct_put; | |
3837 c->idct_add= ff_jref_idct_add; | |
3838 c->idct = j_rev_dct; | |
3839 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; | |
2693 | 3840 }else if(avctx->idct_algo==FF_IDCT_VP3){ |
3841 c->idct_put= ff_vp3_idct_put_c; | |
3842 c->idct_add= ff_vp3_idct_add_c; | |
3843 c->idct = ff_vp3_idct_c; | |
3844 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
2256 | 3845 }else{ //accurate/default |
3846 c->idct_put= simple_idct_put; | |
3847 c->idct_add= simple_idct_add; | |
3848 c->idct = simple_idct; | |
3849 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
3850 } | |
1092 | 3851 } |
3852 | |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3853 c->h264_idct_add= ff_h264_idct_add_c; |
2755 | 3854 c->h264_idct8_add= ff_h264_idct8_add_c; |
3105
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3089
diff
changeset
|
3855 c->h264_idct_dc_add= ff_h264_idct_dc_add_c; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3089
diff
changeset
|
3856 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c; |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3857 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3858 c->get_pixels = get_pixels_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3859 c->diff_pixels = diff_pixels_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3860 c->put_pixels_clamped = put_pixels_clamped_c; |
1984
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
3861 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3862 c->add_pixels_clamped = add_pixels_clamped_c; |
2763 | 3863 c->add_pixels8 = add_pixels8_c; |
3864 c->add_pixels4 = add_pixels4_c; | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3865 c->gmc1 = gmc1_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3866 c->gmc = gmc_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3867 c->clear_blocks = clear_blocks_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3868 c->pix_sum = pix_sum_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3869 c->pix_norm1 = pix_norm1_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3870 |
859 | 3871 /* TODO [0] 16 [1] 8 */ |
1708 | 3872 c->pix_abs[0][0] = pix_abs16_c; |
3873 c->pix_abs[0][1] = pix_abs16_x2_c; | |
3874 c->pix_abs[0][2] = pix_abs16_y2_c; | |
3875 c->pix_abs[0][3] = pix_abs16_xy2_c; | |
3876 c->pix_abs[1][0] = pix_abs8_c; | |
3877 c->pix_abs[1][1] = pix_abs8_x2_c; | |
3878 c->pix_abs[1][2] = pix_abs8_y2_c; | |
3879 c->pix_abs[1][3] = pix_abs8_xy2_c; | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3880 |
859 | 3881 #define dspfunc(PFX, IDX, NUM) \ |
3882 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \ | |
3883 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \ | |
3884 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \ | |
3885 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3886 |
859 | 3887 dspfunc(put, 0, 16); |
3888 dspfunc(put_no_rnd, 0, 16); | |
3889 dspfunc(put, 1, 8); | |
3890 dspfunc(put_no_rnd, 1, 8); | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3891 dspfunc(put, 2, 4); |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3892 dspfunc(put, 3, 2); |
0 | 3893 |
859 | 3894 dspfunc(avg, 0, 16); |
3895 dspfunc(avg_no_rnd, 0, 16); | |
3896 dspfunc(avg, 1, 8); | |
3897 dspfunc(avg_no_rnd, 1, 8); | |
1319 | 3898 dspfunc(avg, 2, 4); |
3899 dspfunc(avg, 3, 2); | |
859 | 3900 #undef dspfunc |
857 | 3901 |
1864 | 3902 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c; |
3903 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c; | |
3904 | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3905 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3906 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3907 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3908 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3909 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3910 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3911 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3912 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3913 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3914 |
1319 | 3915 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; |
3916 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; | |
3917 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; | |
3918 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; | |
3919 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; | |
3920 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; | |
3921 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; | |
3922 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; | |
3923 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; | |
3924 | |
859 | 3925 #define dspfunc(PFX, IDX, NUM) \ |
3926 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \ | |
3927 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \ | |
3928 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \ | |
3929 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \ | |
3930 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \ | |
3931 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \ | |
3932 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \ | |
3933 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \ | |
3934 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \ | |
3935 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \ | |
3936 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \ | |
3937 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \ | |
3938 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \ | |
3939 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \ | |
3940 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \ | |
3941 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c | |
857 | 3942 |
859 | 3943 dspfunc(put_qpel, 0, 16); |
3944 dspfunc(put_no_rnd_qpel, 0, 16); | |
3945 | |
3946 dspfunc(avg_qpel, 0, 16); | |
3947 /* dspfunc(avg_no_rnd_qpel, 0, 16); */ | |
857 | 3948 |
859 | 3949 dspfunc(put_qpel, 1, 8); |
3950 dspfunc(put_no_rnd_qpel, 1, 8); | |
3951 | |
3952 dspfunc(avg_qpel, 1, 8); | |
3953 /* dspfunc(avg_no_rnd_qpel, 1, 8); */ | |
1168 | 3954 |
3955 dspfunc(put_h264_qpel, 0, 16); | |
3956 dspfunc(put_h264_qpel, 1, 8); | |
3957 dspfunc(put_h264_qpel, 2, 4); | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3013
diff
changeset
|
3958 dspfunc(put_h264_qpel, 3, 2); |
1168 | 3959 dspfunc(avg_h264_qpel, 0, 16); |
3960 dspfunc(avg_h264_qpel, 1, 8); | |
3961 dspfunc(avg_h264_qpel, 2, 4); | |
3962 | |
859 | 3963 #undef dspfunc |
1168 | 3964 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c; |
3965 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c; | |
3966 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c; | |
3967 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c; | |
3968 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c; | |
3969 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c; | |
857 | 3970 |
2415 | 3971 c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c; |
3972 c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c; | |
3973 c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c; | |
3974 c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c; | |
3975 c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c; | |
3976 c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c; | |
3977 c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c; | |
3978 c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c; | |
3979 c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c; | |
3980 c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c; | |
3981 c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c; | |
3982 c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c; | |
3983 c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c; | |
3984 c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c; | |
3985 c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c; | |
3986 c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c; | |
3987 c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c; | |
3988 c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c; | |
3989 c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c; | |
3990 c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c; | |
3991 | |
936 | 3992 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; |
3993 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; | |
3994 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; | |
3995 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; | |
3996 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; | |
3997 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; | |
3998 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; | |
3999 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; | |
2967 | 4000 |
1708 | 4001 #define SET_CMP_FUNC(name) \ |
4002 c->name[0]= name ## 16_c;\ | |
4003 c->name[1]= name ## 8x8_c; | |
2967 | 4004 |
1708 | 4005 SET_CMP_FUNC(hadamard8_diff) |
1729 | 4006 c->hadamard8_diff[4]= hadamard8_intra16_c; |
1708 | 4007 SET_CMP_FUNC(dct_sad) |
2382 | 4008 SET_CMP_FUNC(dct_max) |
3013 | 4009 #ifdef CONFIG_GPL |
3010
533c6386eca9
8x8 integer dct from x264 as cmp function (under CONFIG_GPL)
michael
parents:
2979
diff
changeset
|
4010 SET_CMP_FUNC(dct264_sad) |
3013 | 4011 #endif |
1708 | 4012 c->sad[0]= pix_abs16_c; |
4013 c->sad[1]= pix_abs8_c; | |
4014 c->sse[0]= sse16_c; | |
4015 c->sse[1]= sse8_c; | |
2184 | 4016 c->sse[2]= sse4_c; |
1708 | 4017 SET_CMP_FUNC(quant_psnr) |
4018 SET_CMP_FUNC(rd) | |
4019 SET_CMP_FUNC(bit) | |
1729 | 4020 c->vsad[0]= vsad16_c; |
4021 c->vsad[4]= vsad_intra16_c; | |
4022 c->vsse[0]= vsse16_c; | |
4023 c->vsse[4]= vsse_intra16_c; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
4024 c->nsse[0]= nsse16_c; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
4025 c->nsse[1]= nsse8_c; |
2184 | 4026 c->w53[0]= w53_16_c; |
4027 c->w53[1]= w53_8_c; | |
4028 c->w97[0]= w97_16_c; | |
4029 c->w97[1]= w97_8_c; | |
4030 | |
866 | 4031 c->add_bytes= add_bytes_c; |
4032 c->diff_bytes= diff_bytes_c; | |
1527 | 4033 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; |
1273 | 4034 c->bswap_buf= bswap_buf; |
2633 | 4035 |
4036 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c; | |
4037 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; | |
4038 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c; | |
4039 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c; | |
2707
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
4040 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c; |
360024d31dab
H.264 deblocking optimizations (mmx for chroma_bS4 case, convert existing cases to 8-bit math)
lorenm
parents:
2696
diff
changeset
|
4041 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c; |
2967 | 4042 |
1644 | 4043 c->h263_h_loop_filter= h263_h_loop_filter_c; |
4044 c->h263_v_loop_filter= h263_v_loop_filter_c; | |
2967 | 4045 |
2045 | 4046 c->h261_loop_filter= h261_loop_filter_c; |
2967 | 4047 |
1784 | 4048 c->try_8x8basis= try_8x8basis_c; |
4049 c->add_8x8basis= add_8x8basis_c; | |
866 | 4050 |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3105
diff
changeset
|
4051 c->vertical_compose97i = ff_snow_vertical_compose97i; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3105
diff
changeset
|
4052 c->horizontal_compose97i = ff_snow_horizontal_compose97i; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3105
diff
changeset
|
4053 c->inner_add_yblock = ff_snow_inner_add_yblock; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3105
diff
changeset
|
4054 |
2 | 4055 #ifdef HAVE_MMX |
1092 | 4056 dsputil_init_mmx(c, avctx); |
0 | 4057 #endif |
62 | 4058 #ifdef ARCH_ARMV4L |
1092 | 4059 dsputil_init_armv4l(c, avctx); |
62 | 4060 #endif |
88 | 4061 #ifdef HAVE_MLIB |
1092 | 4062 dsputil_init_mlib(c, avctx); |
88 | 4063 #endif |
1959
55b7435c59b8
VIS optimized motion compensation code. by (David S. Miller <davem at redhat dot com>)
michael
parents:
1866
diff
changeset
|
4064 #ifdef ARCH_SPARC |
55b7435c59b8
VIS optimized motion compensation code. by (David S. Miller <davem at redhat dot com>)
michael
parents:
1866
diff
changeset
|
4065 dsputil_init_vis(c,avctx); |
55b7435c59b8
VIS optimized motion compensation code. by (David S. Miller <davem at redhat dot com>)
michael
parents:
1866
diff
changeset
|
4066 #endif |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
209
diff
changeset
|
4067 #ifdef ARCH_ALPHA |
1092 | 4068 dsputil_init_alpha(c, avctx); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
209
diff
changeset
|
4069 #endif |
623
92e99e506920
first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
612
diff
changeset
|
4070 #ifdef ARCH_POWERPC |
1092 | 4071 dsputil_init_ppc(c, avctx); |
626
23a093d6e450
patch by Heliodoro Tammaro <helio at interactives dot org>
michaelni
parents:
625
diff
changeset
|
4072 #endif |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
676
diff
changeset
|
4073 #ifdef HAVE_MMI |
1092 | 4074 dsputil_init_mmi(c, avctx); |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
676
diff
changeset
|
4075 #endif |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1201
diff
changeset
|
4076 #ifdef ARCH_SH4 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1201
diff
changeset
|
4077 dsputil_init_sh4(c,avctx); |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1201
diff
changeset
|
4078 #endif |
1092 | 4079 |
4080 switch(c->idct_permutation_type){ | |
4081 case FF_NO_IDCT_PERM: | |
4082 for(i=0; i<64; i++) | |
4083 c->idct_permutation[i]= i; | |
4084 break; | |
4085 case FF_LIBMPEG2_IDCT_PERM: | |
4086 for(i=0; i<64; i++) | |
4087 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |
4088 break; | |
4089 case FF_SIMPLE_IDCT_PERM: | |
4090 for(i=0; i<64; i++) | |
4091 c->idct_permutation[i]= simple_mmx_permutation[i]; | |
4092 break; | |
4093 case FF_TRANSPOSE_IDCT_PERM: | |
4094 for(i=0; i<64; i++) | |
4095 c->idct_permutation[i]= ((i&7)<<3) | (i>>3); | |
4096 break; | |
2696
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4097 case FF_PARTTRANS_IDCT_PERM: |
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4098 for(i=0; i<64; i++) |
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4099 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); |
9699d325049d
porting the mmx&sse2 (sse2 untested) vp3 idcts to the lavc idct API
michael
parents:
2693
diff
changeset
|
4100 break; |
1092 | 4101 default: |
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1571
diff
changeset
|
4102 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); |
1092 | 4103 } |
0 | 4104 } |
252
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
4105 |