Mercurial > libavcodec.hg
annotate dsputil.c @ 1708:dea5b2946999 libavcodec
interlaced motion estimation
interlaced mpeg2 encoding
P & B frames
rate distored interlaced mb decission
alternate scantable support
4mv encoding fixes (thats also why the regression tests change)
passing height to most dsp functions
interlaced mpeg4 encoding (no direct mode MBs yet)
various related cleanups
disabled old motion estimaton algorithms (log, full, ...) they will either be fixed or removed
author | michael |
---|---|
date | Tue, 30 Dec 2003 16:07:57 +0000 |
parents | 1a2db2073848 |
children | a4a5e7521339 |
rev | line source |
---|---|
0 | 1 /* |
2 * DSP utils | |
429 | 3 * Copyright (c) 2000, 2001 Fabrice Bellard. |
0 | 4 * |
429 | 5 * This library is free software; you can redistribute it and/or |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
0 | 9 * |
429 | 10 * This library is distributed in the hope that it will be useful, |
0 | 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 * Lesser General Public License for more details. | |
0 | 14 * |
429 | 15 * You should have received a copy of the GNU Lesser General Public |
16 * License along with this library; if not, write to the Free Software | |
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
256 | 18 * |
385 | 19 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> |
0 | 20 */ |
1106 | 21 |
22 /** | |
23 * @file dsputil.c | |
24 * DSP utils | |
25 */ | |
26 | |
0 | 27 #include "avcodec.h" |
28 #include "dsputil.h" | |
936 | 29 #include "mpegvideo.h" |
1092 | 30 #include "simple_idct.h" |
1557 | 31 #include "faandct.h" |
676 | 32 |
1064 | 33 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; |
34 uint32_t squareTbl[512]; | |
0 | 35 |
1064 | 36 const uint8_t ff_zigzag_direct[64] = { |
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
37 0, 1, 8, 16, 9, 2, 3, 10, |
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
38 17, 24, 32, 25, 18, 11, 4, 5, |
34 | 39 12, 19, 26, 33, 40, 48, 41, 34, |
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
40 27, 20, 13, 6, 7, 14, 21, 28, |
34 | 41 35, 42, 49, 56, 57, 50, 43, 36, |
42 29, 22, 15, 23, 30, 37, 44, 51, | |
43 58, 59, 52, 45, 38, 31, 39, 46, | |
44 53, 60, 61, 54, 47, 55, 62, 63 | |
45 }; | |
46 | |
1567 | 47 /* Specific zigzag scan for 248 idct. NOTE that unlike the |
48 specification, we interleave the fields */ | |
49 const uint8_t ff_zigzag248_direct[64] = { | |
50 0, 8, 1, 9, 16, 24, 2, 10, | |
51 17, 25, 32, 40, 48, 56, 33, 41, | |
52 18, 26, 3, 11, 4, 12, 19, 27, | |
53 34, 42, 49, 57, 50, 58, 35, 43, | |
54 20, 28, 5, 13, 6, 14, 21, 29, | |
55 36, 44, 51, 59, 52, 60, 37, 45, | |
56 22, 30, 7, 15, 23, 31, 38, 46, | |
57 53, 61, 54, 62, 39, 47, 55, 63, | |
58 }; | |
59 | |
220 | 60 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ |
1064 | 61 uint16_t __align8 inv_zigzag_direct16[64]; |
220 | 62 |
1064 | 63 const uint8_t ff_alternate_horizontal_scan[64] = { |
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
64 0, 1, 2, 3, 8, 9, 16, 17, |
34 | 65 10, 11, 4, 5, 6, 7, 15, 14, |
66 13, 12, 19, 18, 24, 25, 32, 33, | |
67 26, 27, 20, 21, 22, 23, 28, 29, | |
68 30, 31, 34, 35, 40, 41, 48, 49, | |
69 42, 43, 36, 37, 38, 39, 44, 45, | |
70 46, 47, 50, 51, 56, 57, 58, 59, | |
71 52, 53, 54, 55, 60, 61, 62, 63, | |
72 }; | |
73 | |
1064 | 74 const uint8_t ff_alternate_vertical_scan[64] = { |
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
75 0, 8, 16, 24, 1, 9, 2, 10, |
34 | 76 17, 25, 32, 40, 48, 56, 57, 49, |
77 41, 33, 26, 18, 3, 11, 4, 12, | |
78 19, 27, 34, 42, 50, 58, 35, 43, | |
79 51, 59, 20, 28, 5, 13, 6, 14, | |
80 21, 29, 36, 44, 52, 60, 37, 45, | |
81 53, 61, 22, 30, 7, 15, 23, 31, | |
82 38, 46, 54, 62, 39, 47, 55, 63, | |
83 }; | |
84 | |
220 | 85 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ |
1064 | 86 const uint32_t inverse[256]={ |
220 | 87 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, |
88 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, | |
89 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, | |
90 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, | |
91 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, | |
92 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, | |
93 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, | |
94 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, | |
95 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, | |
96 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, | |
97 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, | |
98 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, | |
99 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, | |
100 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, | |
101 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, | |
102 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, | |
103 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, | |
104 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, | |
105 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, | |
106 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, | |
107 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, | |
108 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, | |
109 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, | |
110 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, | |
111 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, | |
112 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, | |
113 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, | |
114 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, | |
115 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, | |
116 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, | |
117 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, | |
118 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, | |
119 }; | |
120 | |
1092 | 121 /* Input permutation for the simple_idct_mmx */ |
122 static const uint8_t simple_mmx_permutation[64]={ | |
123 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |
124 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |
125 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |
126 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |
127 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |
128 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |
129 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |
130 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |
131 }; | |
132 | |
1064 | 133 static int pix_sum_c(uint8_t * pix, int line_size) |
612 | 134 { |
135 int s, i, j; | |
136 | |
137 s = 0; | |
138 for (i = 0; i < 16; i++) { | |
139 for (j = 0; j < 16; j += 8) { | |
140 s += pix[0]; | |
141 s += pix[1]; | |
142 s += pix[2]; | |
143 s += pix[3]; | |
144 s += pix[4]; | |
145 s += pix[5]; | |
146 s += pix[6]; | |
147 s += pix[7]; | |
148 pix += 8; | |
149 } | |
150 pix += line_size - 16; | |
151 } | |
152 return s; | |
153 } | |
154 | |
1064 | 155 static int pix_norm1_c(uint8_t * pix, int line_size) |
612 | 156 { |
157 int s, i, j; | |
1064 | 158 uint32_t *sq = squareTbl + 256; |
612 | 159 |
160 s = 0; | |
161 for (i = 0; i < 16; i++) { | |
162 for (j = 0; j < 16; j += 8) { | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
163 #if 0 |
612 | 164 s += sq[pix[0]]; |
165 s += sq[pix[1]]; | |
166 s += sq[pix[2]]; | |
167 s += sq[pix[3]]; | |
168 s += sq[pix[4]]; | |
169 s += sq[pix[5]]; | |
170 s += sq[pix[6]]; | |
171 s += sq[pix[7]]; | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
172 #else |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
173 #if LONG_MAX > 2147483647 |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
174 register uint64_t x=*(uint64_t*)pix; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
175 s += sq[x&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
176 s += sq[(x>>8)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
177 s += sq[(x>>16)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
178 s += sq[(x>>24)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
179 s += sq[(x>>32)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
180 s += sq[(x>>40)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
181 s += sq[(x>>48)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
182 s += sq[(x>>56)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
183 #else |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
184 register uint32_t x=*(uint32_t*)pix; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
185 s += sq[x&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
186 s += sq[(x>>8)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
187 s += sq[(x>>16)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
188 s += sq[(x>>24)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
189 x=*(uint32_t*)(pix+4); |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
190 s += sq[x&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
191 s += sq[(x>>8)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
192 s += sq[(x>>16)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
193 s += sq[(x>>24)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
194 #endif |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
195 #endif |
612 | 196 pix += 8; |
197 } | |
198 pix += line_size - 16; | |
199 } | |
200 return s; | |
201 } | |
202 | |
1273 | 203 static void bswap_buf(uint32_t *dst, uint32_t *src, int w){ |
204 int i; | |
205 | |
206 for(i=0; i+8<=w; i+=8){ | |
207 dst[i+0]= bswap_32(src[i+0]); | |
208 dst[i+1]= bswap_32(src[i+1]); | |
209 dst[i+2]= bswap_32(src[i+2]); | |
210 dst[i+3]= bswap_32(src[i+3]); | |
211 dst[i+4]= bswap_32(src[i+4]); | |
212 dst[i+5]= bswap_32(src[i+5]); | |
213 dst[i+6]= bswap_32(src[i+6]); | |
214 dst[i+7]= bswap_32(src[i+7]); | |
215 } | |
216 for(;i<w; i++){ | |
217 dst[i+0]= bswap_32(src[i+0]); | |
218 } | |
219 } | |
612 | 220 |
1708 | 221 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) |
936 | 222 { |
223 int s, i; | |
1064 | 224 uint32_t *sq = squareTbl + 256; |
936 | 225 |
226 s = 0; | |
1708 | 227 for (i = 0; i < h; i++) { |
936 | 228 s += sq[pix1[0] - pix2[0]]; |
229 s += sq[pix1[1] - pix2[1]]; | |
230 s += sq[pix1[2] - pix2[2]]; | |
231 s += sq[pix1[3] - pix2[3]]; | |
232 s += sq[pix1[4] - pix2[4]]; | |
233 s += sq[pix1[5] - pix2[5]]; | |
234 s += sq[pix1[6] - pix2[6]]; | |
235 s += sq[pix1[7] - pix2[7]]; | |
236 pix1 += line_size; | |
237 pix2 += line_size; | |
238 } | |
239 return s; | |
240 } | |
241 | |
1708 | 242 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
884 | 243 { |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
244 int s, i; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
245 uint32_t *sq = squareTbl + 256; |
884 | 246 |
247 s = 0; | |
1708 | 248 for (i = 0; i < h; i++) { |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
249 s += sq[pix1[ 0] - pix2[ 0]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
250 s += sq[pix1[ 1] - pix2[ 1]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
251 s += sq[pix1[ 2] - pix2[ 2]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
252 s += sq[pix1[ 3] - pix2[ 3]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
253 s += sq[pix1[ 4] - pix2[ 4]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
254 s += sq[pix1[ 5] - pix2[ 5]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
255 s += sq[pix1[ 6] - pix2[ 6]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
256 s += sq[pix1[ 7] - pix2[ 7]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
257 s += sq[pix1[ 8] - pix2[ 8]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
258 s += sq[pix1[ 9] - pix2[ 9]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
259 s += sq[pix1[10] - pix2[10]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
260 s += sq[pix1[11] - pix2[11]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
261 s += sq[pix1[12] - pix2[12]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
262 s += sq[pix1[13] - pix2[13]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
263 s += sq[pix1[14] - pix2[14]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
264 s += sq[pix1[15] - pix2[15]]; |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
265 |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
266 pix1 += line_size; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
267 pix2 += line_size; |
884 | 268 } |
269 return s; | |
270 } | |
271 | |
1064 | 272 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) |
0 | 273 { |
274 int i; | |
275 | |
276 /* read the pixels */ | |
277 for(i=0;i<8;i++) { | |
516 | 278 block[0] = pixels[0]; |
279 block[1] = pixels[1]; | |
280 block[2] = pixels[2]; | |
281 block[3] = pixels[3]; | |
282 block[4] = pixels[4]; | |
283 block[5] = pixels[5]; | |
284 block[6] = pixels[6]; | |
285 block[7] = pixels[7]; | |
286 pixels += line_size; | |
287 block += 8; | |
0 | 288 } |
289 } | |
290 | |
1064 | 291 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, |
292 const uint8_t *s2, int stride){ | |
324 | 293 int i; |
294 | |
295 /* read the pixels */ | |
296 for(i=0;i<8;i++) { | |
516 | 297 block[0] = s1[0] - s2[0]; |
298 block[1] = s1[1] - s2[1]; | |
299 block[2] = s1[2] - s2[2]; | |
300 block[3] = s1[3] - s2[3]; | |
301 block[4] = s1[4] - s2[4]; | |
302 block[5] = s1[5] - s2[5]; | |
303 block[6] = s1[6] - s2[6]; | |
304 block[7] = s1[7] - s2[7]; | |
324 | 305 s1 += stride; |
306 s2 += stride; | |
516 | 307 block += 8; |
324 | 308 } |
309 } | |
310 | |
311 | |
1064 | 312 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
313 int line_size) |
0 | 314 { |
315 int i; | |
1064 | 316 uint8_t *cm = cropTbl + MAX_NEG_CROP; |
0 | 317 |
318 /* read the pixels */ | |
319 for(i=0;i<8;i++) { | |
516 | 320 pixels[0] = cm[block[0]]; |
321 pixels[1] = cm[block[1]]; | |
322 pixels[2] = cm[block[2]]; | |
323 pixels[3] = cm[block[3]]; | |
324 pixels[4] = cm[block[4]]; | |
325 pixels[5] = cm[block[5]]; | |
326 pixels[6] = cm[block[6]]; | |
327 pixels[7] = cm[block[7]]; | |
328 | |
329 pixels += line_size; | |
330 block += 8; | |
0 | 331 } |
332 } | |
333 | |
1064 | 334 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
516 | 335 int line_size) |
0 | 336 { |
337 int i; | |
1064 | 338 uint8_t *cm = cropTbl + MAX_NEG_CROP; |
0 | 339 |
340 /* read the pixels */ | |
341 for(i=0;i<8;i++) { | |
516 | 342 pixels[0] = cm[pixels[0] + block[0]]; |
343 pixels[1] = cm[pixels[1] + block[1]]; | |
344 pixels[2] = cm[pixels[2] + block[2]]; | |
345 pixels[3] = cm[pixels[3] + block[3]]; | |
346 pixels[4] = cm[pixels[4] + block[4]]; | |
347 pixels[5] = cm[pixels[5] + block[5]]; | |
348 pixels[6] = cm[pixels[6] + block[6]]; | |
349 pixels[7] = cm[pixels[7] + block[7]]; | |
350 pixels += line_size; | |
351 block += 8; | |
0 | 352 } |
353 } | |
385 | 354 #if 0 |
355 | |
356 #define PIXOP2(OPNAME, OP) \ | |
651 | 357 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 358 {\ |
359 int i;\ | |
360 for(i=0; i<h; i++){\ | |
361 OP(*((uint64_t*)block), LD64(pixels));\ | |
362 pixels+=line_size;\ | |
363 block +=line_size;\ | |
364 }\ | |
365 }\ | |
366 \ | |
859 | 367 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 368 {\ |
369 int i;\ | |
370 for(i=0; i<h; i++){\ | |
371 const uint64_t a= LD64(pixels );\ | |
372 const uint64_t b= LD64(pixels+1);\ | |
373 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
374 pixels+=line_size;\ | |
375 block +=line_size;\ | |
376 }\ | |
377 }\ | |
378 \ | |
859 | 379 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 380 {\ |
381 int i;\ | |
382 for(i=0; i<h; i++){\ | |
383 const uint64_t a= LD64(pixels );\ | |
384 const uint64_t b= LD64(pixels+1);\ | |
385 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
386 pixels+=line_size;\ | |
387 block +=line_size;\ | |
388 }\ | |
389 }\ | |
390 \ | |
859 | 391 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 392 {\ |
393 int i;\ | |
394 for(i=0; i<h; i++){\ | |
395 const uint64_t a= LD64(pixels );\ | |
396 const uint64_t b= LD64(pixels+line_size);\ | |
397 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
398 pixels+=line_size;\ | |
399 block +=line_size;\ | |
400 }\ | |
401 }\ | |
402 \ | |
859 | 403 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 404 {\ |
405 int i;\ | |
406 for(i=0; i<h; i++){\ | |
407 const uint64_t a= LD64(pixels );\ | |
408 const uint64_t b= LD64(pixels+line_size);\ | |
409 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
410 pixels+=line_size;\ | |
411 block +=line_size;\ | |
412 }\ | |
413 }\ | |
414 \ | |
859 | 415 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 416 {\ |
417 int i;\ | |
418 const uint64_t a= LD64(pixels );\ | |
419 const uint64_t b= LD64(pixels+1);\ | |
420 uint64_t l0= (a&0x0303030303030303ULL)\ | |
421 + (b&0x0303030303030303ULL)\ | |
422 + 0x0202020202020202ULL;\ | |
423 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
424 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
425 uint64_t l1,h1;\ | |
426 \ | |
427 pixels+=line_size;\ | |
428 for(i=0; i<h; i+=2){\ | |
429 uint64_t a= LD64(pixels );\ | |
430 uint64_t b= LD64(pixels+1);\ | |
431 l1= (a&0x0303030303030303ULL)\ | |
432 + (b&0x0303030303030303ULL);\ | |
433 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
434 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
435 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
436 pixels+=line_size;\ | |
437 block +=line_size;\ | |
438 a= LD64(pixels );\ | |
439 b= LD64(pixels+1);\ | |
440 l0= (a&0x0303030303030303ULL)\ | |
441 + (b&0x0303030303030303ULL)\ | |
442 + 0x0202020202020202ULL;\ | |
443 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
444 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
445 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
446 pixels+=line_size;\ | |
447 block +=line_size;\ | |
448 }\ | |
449 }\ | |
450 \ | |
859 | 451 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 452 {\ |
453 int i;\ | |
454 const uint64_t a= LD64(pixels );\ | |
455 const uint64_t b= LD64(pixels+1);\ | |
456 uint64_t l0= (a&0x0303030303030303ULL)\ | |
457 + (b&0x0303030303030303ULL)\ | |
458 + 0x0101010101010101ULL;\ | |
459 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
460 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
461 uint64_t l1,h1;\ | |
462 \ | |
463 pixels+=line_size;\ | |
464 for(i=0; i<h; i+=2){\ | |
465 uint64_t a= LD64(pixels );\ | |
466 uint64_t b= LD64(pixels+1);\ | |
467 l1= (a&0x0303030303030303ULL)\ | |
468 + (b&0x0303030303030303ULL);\ | |
469 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
470 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
471 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
472 pixels+=line_size;\ | |
473 block +=line_size;\ | |
474 a= LD64(pixels );\ | |
475 b= LD64(pixels+1);\ | |
476 l0= (a&0x0303030303030303ULL)\ | |
477 + (b&0x0303030303030303ULL)\ | |
478 + 0x0101010101010101ULL;\ | |
479 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
480 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
481 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
482 pixels+=line_size;\ | |
483 block +=line_size;\ | |
484 }\ | |
485 }\ | |
486 \ | |
859 | 487 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\ |
488 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\ | |
489 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\ | |
490 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\ | |
491 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\ | |
492 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\ | |
493 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8) | |
385 | 494 |
495 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) | |
496 #else // 64 bit variant | |
497 | |
498 #define PIXOP2(OPNAME, OP) \ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
499 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
500 int i;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
501 for(i=0; i<h; i++){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
502 OP(*((uint16_t*)(block )), LD16(pixels ));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
503 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
504 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
505 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
506 }\ |
1168 | 507 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
508 int i;\ | |
509 for(i=0; i<h; i++){\ | |
510 OP(*((uint32_t*)(block )), LD32(pixels ));\ | |
511 pixels+=line_size;\ | |
512 block +=line_size;\ | |
513 }\ | |
514 }\ | |
859 | 515 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
385 | 516 int i;\ |
517 for(i=0; i<h; i++){\ | |
518 OP(*((uint32_t*)(block )), LD32(pixels ));\ | |
519 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\ | |
520 pixels+=line_size;\ | |
521 block +=line_size;\ | |
522 }\ | |
523 }\ | |
859 | 524 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
525 OPNAME ## _pixels8_c(block, pixels, line_size, h);\ | |
651 | 526 }\ |
385 | 527 \ |
651 | 528 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
529 int src_stride1, int src_stride2, int h){\ | |
385 | 530 int i;\ |
531 for(i=0; i<h; i++){\ | |
651 | 532 uint32_t a,b;\ |
533 a= LD32(&src1[i*src_stride1 ]);\ | |
534 b= LD32(&src2[i*src_stride2 ]);\ | |
1264 | 535 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ |
651 | 536 a= LD32(&src1[i*src_stride1+4]);\ |
537 b= LD32(&src2[i*src_stride2+4]);\ | |
1264 | 538 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ |
385 | 539 }\ |
540 }\ | |
541 \ | |
651 | 542 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
543 int src_stride1, int src_stride2, int h){\ | |
385 | 544 int i;\ |
545 for(i=0; i<h; i++){\ | |
651 | 546 uint32_t a,b;\ |
547 a= LD32(&src1[i*src_stride1 ]);\ | |
548 b= LD32(&src2[i*src_stride2 ]);\ | |
1264 | 549 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
651 | 550 a= LD32(&src1[i*src_stride1+4]);\ |
551 b= LD32(&src2[i*src_stride2+4]);\ | |
1264 | 552 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ |
385 | 553 }\ |
554 }\ | |
555 \ | |
1168 | 556 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
557 int src_stride1, int src_stride2, int h){\ | |
558 int i;\ | |
559 for(i=0; i<h; i++){\ | |
560 uint32_t a,b;\ | |
561 a= LD32(&src1[i*src_stride1 ]);\ | |
562 b= LD32(&src2[i*src_stride2 ]);\ | |
1264 | 563 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
1168 | 564 }\ |
565 }\ | |
566 \ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
567 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
568 int src_stride1, int src_stride2, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
569 int i;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
570 for(i=0; i<h; i++){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
571 uint32_t a,b;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
572 a= LD16(&src1[i*src_stride1 ]);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
573 b= LD16(&src2[i*src_stride2 ]);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
574 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
575 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
576 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
577 \ |
651 | 578 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
579 int src_stride1, int src_stride2, int h){\ | |
580 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | |
581 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | |
582 }\ | |
583 \ | |
584 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | |
585 int src_stride1, int src_stride2, int h){\ | |
586 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | |
587 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | |
588 }\ | |
589 \ | |
859 | 590 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 591 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
592 }\ | |
593 \ | |
859 | 594 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 595 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
596 }\ | |
597 \ | |
859 | 598 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 599 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
600 }\ | |
601 \ | |
859 | 602 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 603 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
385 | 604 }\ |
605 \ | |
651 | 606 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
607 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
608 int i;\ | |
609 for(i=0; i<h; i++){\ | |
610 uint32_t a, b, c, d, l0, l1, h0, h1;\ | |
611 a= LD32(&src1[i*src_stride1]);\ | |
612 b= LD32(&src2[i*src_stride2]);\ | |
613 c= LD32(&src3[i*src_stride3]);\ | |
614 d= LD32(&src4[i*src_stride4]);\ | |
615 l0= (a&0x03030303UL)\ | |
616 + (b&0x03030303UL)\ | |
617 + 0x02020202UL;\ | |
618 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
619 + ((b&0xFCFCFCFCUL)>>2);\ | |
620 l1= (c&0x03030303UL)\ | |
621 + (d&0x03030303UL);\ | |
622 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
623 + ((d&0xFCFCFCFCUL)>>2);\ | |
624 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
625 a= LD32(&src1[i*src_stride1+4]);\ | |
626 b= LD32(&src2[i*src_stride2+4]);\ | |
627 c= LD32(&src3[i*src_stride3+4]);\ | |
628 d= LD32(&src4[i*src_stride4+4]);\ | |
629 l0= (a&0x03030303UL)\ | |
630 + (b&0x03030303UL)\ | |
631 + 0x02020202UL;\ | |
632 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
633 + ((b&0xFCFCFCFCUL)>>2);\ | |
634 l1= (c&0x03030303UL)\ | |
635 + (d&0x03030303UL);\ | |
636 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
637 + ((d&0xFCFCFCFCUL)>>2);\ | |
638 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
639 }\ | |
640 }\ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
641 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
642 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
643 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
644 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
645 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
646 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
647 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
648 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
649 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
650 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
651 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
652 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
653 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
654 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
655 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
656 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
657 \ |
651 | 658 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
659 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
385 | 660 int i;\ |
661 for(i=0; i<h; i++){\ | |
651 | 662 uint32_t a, b, c, d, l0, l1, h0, h1;\ |
663 a= LD32(&src1[i*src_stride1]);\ | |
664 b= LD32(&src2[i*src_stride2]);\ | |
665 c= LD32(&src3[i*src_stride3]);\ | |
666 d= LD32(&src4[i*src_stride4]);\ | |
667 l0= (a&0x03030303UL)\ | |
668 + (b&0x03030303UL)\ | |
669 + 0x01010101UL;\ | |
670 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
671 + ((b&0xFCFCFCFCUL)>>2);\ | |
672 l1= (c&0x03030303UL)\ | |
673 + (d&0x03030303UL);\ | |
674 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
675 + ((d&0xFCFCFCFCUL)>>2);\ | |
676 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
677 a= LD32(&src1[i*src_stride1+4]);\ | |
678 b= LD32(&src2[i*src_stride2+4]);\ | |
679 c= LD32(&src3[i*src_stride3+4]);\ | |
680 d= LD32(&src4[i*src_stride4+4]);\ | |
681 l0= (a&0x03030303UL)\ | |
682 + (b&0x03030303UL)\ | |
683 + 0x01010101UL;\ | |
684 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
685 + ((b&0xFCFCFCFCUL)>>2);\ | |
686 l1= (c&0x03030303UL)\ | |
687 + (d&0x03030303UL);\ | |
688 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
689 + ((d&0xFCFCFCFCUL)>>2);\ | |
690 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
385 | 691 }\ |
692 }\ | |
651 | 693 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
694 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
695 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
696 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
697 }\ | |
698 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ | |
699 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
700 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
701 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
702 }\ | |
385 | 703 \ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
704 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
705 {\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
706 int i, a0, b0, a1, b1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
707 a0= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
708 b0= pixels[1] + 2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
709 a0 += b0;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
710 b0 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
711 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
712 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
713 for(i=0; i<h; i+=2){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
714 a1= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
715 b1= pixels[1];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
716 a1 += b1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
717 b1 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
718 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
719 block[0]= (a1+a0)>>2; /* FIXME non put */\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
720 block[1]= (b1+b0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
721 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
722 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
723 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
724 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
725 a0= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
726 b0= pixels[1] + 2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
727 a0 += b0;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
728 b0 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
729 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
730 block[0]= (a1+a0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
731 block[1]= (b1+b0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
732 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
733 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
734 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
735 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
736 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
737 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
738 {\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
739 int i;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
740 const uint32_t a= LD32(pixels );\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
741 const uint32_t b= LD32(pixels+1);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
742 uint32_t l0= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
743 + (b&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
744 + 0x02020202UL;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
745 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
746 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
747 uint32_t l1,h1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
748 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
749 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
750 for(i=0; i<h; i+=2){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
751 uint32_t a= LD32(pixels );\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
752 uint32_t b= LD32(pixels+1);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
753 l1= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
754 + (b&0x03030303UL);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
755 h1= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
756 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
757 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
758 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
759 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
760 a= LD32(pixels );\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
761 b= LD32(pixels+1);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
762 l0= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
763 + (b&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
764 + 0x02020202UL;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
765 h0= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
766 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
767 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
768 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
769 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
770 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
771 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
772 \ |
859 | 773 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 774 {\ |
775 int j;\ | |
776 for(j=0; j<2; j++){\ | |
777 int i;\ | |
778 const uint32_t a= LD32(pixels );\ | |
779 const uint32_t b= LD32(pixels+1);\ | |
780 uint32_t l0= (a&0x03030303UL)\ | |
781 + (b&0x03030303UL)\ | |
782 + 0x02020202UL;\ | |
783 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
784 + ((b&0xFCFCFCFCUL)>>2);\ | |
785 uint32_t l1,h1;\ | |
786 \ | |
787 pixels+=line_size;\ | |
788 for(i=0; i<h; i+=2){\ | |
789 uint32_t a= LD32(pixels );\ | |
790 uint32_t b= LD32(pixels+1);\ | |
791 l1= (a&0x03030303UL)\ | |
792 + (b&0x03030303UL);\ | |
793 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
794 + ((b&0xFCFCFCFCUL)>>2);\ | |
795 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
796 pixels+=line_size;\ | |
797 block +=line_size;\ | |
798 a= LD32(pixels );\ | |
799 b= LD32(pixels+1);\ | |
800 l0= (a&0x03030303UL)\ | |
801 + (b&0x03030303UL)\ | |
802 + 0x02020202UL;\ | |
803 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
804 + ((b&0xFCFCFCFCUL)>>2);\ | |
805 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
806 pixels+=line_size;\ | |
807 block +=line_size;\ | |
808 }\ | |
809 pixels+=4-line_size*(h+1);\ | |
810 block +=4-line_size*h;\ | |
811 }\ | |
812 }\ | |
813 \ | |
859 | 814 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 815 {\ |
816 int j;\ | |
817 for(j=0; j<2; j++){\ | |
818 int i;\ | |
819 const uint32_t a= LD32(pixels );\ | |
820 const uint32_t b= LD32(pixels+1);\ | |
821 uint32_t l0= (a&0x03030303UL)\ | |
822 + (b&0x03030303UL)\ | |
823 + 0x01010101UL;\ | |
824 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
825 + ((b&0xFCFCFCFCUL)>>2);\ | |
826 uint32_t l1,h1;\ | |
827 \ | |
828 pixels+=line_size;\ | |
829 for(i=0; i<h; i+=2){\ | |
830 uint32_t a= LD32(pixels );\ | |
831 uint32_t b= LD32(pixels+1);\ | |
832 l1= (a&0x03030303UL)\ | |
833 + (b&0x03030303UL);\ | |
834 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
835 + ((b&0xFCFCFCFCUL)>>2);\ | |
836 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
837 pixels+=line_size;\ | |
838 block +=line_size;\ | |
839 a= LD32(pixels );\ | |
840 b= LD32(pixels+1);\ | |
841 l0= (a&0x03030303UL)\ | |
842 + (b&0x03030303UL)\ | |
843 + 0x01010101UL;\ | |
844 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
845 + ((b&0xFCFCFCFCUL)>>2);\ | |
846 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
847 pixels+=line_size;\ | |
848 block +=line_size;\ | |
849 }\ | |
850 pixels+=4-line_size*(h+1);\ | |
851 block +=4-line_size*h;\ | |
852 }\ | |
853 }\ | |
854 \ | |
859 | 855 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\ |
856 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\ | |
857 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\ | |
858 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\ | |
859 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\ | |
860 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\ | |
861 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ | |
862 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ | |
651 | 863 |
1264 | 864 #define op_avg(a, b) a = rnd_avg32(a, b) |
385 | 865 #endif |
866 #define op_put(a, b) a = b | |
867 | |
868 PIXOP2(avg, op_avg) | |
869 PIXOP2(put, op_put) | |
870 #undef op_avg | |
871 #undef op_put | |
872 | |
0 | 873 #define avg2(a,b) ((a+b+1)>>1) |
874 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) | |
875 | |
753 | 876 |
1064 | 877 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) |
255 | 878 { |
879 const int A=(16-x16)*(16-y16); | |
880 const int B=( x16)*(16-y16); | |
881 const int C=(16-x16)*( y16); | |
882 const int D=( x16)*( y16); | |
883 int i; | |
884 | |
885 for(i=0; i<h; i++) | |
886 { | |
651 | 887 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; |
888 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; | |
889 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; | |
890 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8; | |
891 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8; | |
892 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8; | |
893 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; | |
894 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; | |
895 dst+= stride; | |
896 src+= stride; | |
255 | 897 } |
898 } | |
899 | |
1064 | 900 static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, |
753 | 901 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) |
902 { | |
903 int y, vx, vy; | |
904 const int s= 1<<shift; | |
905 | |
906 width--; | |
907 height--; | |
908 | |
909 for(y=0; y<h; y++){ | |
910 int x; | |
911 | |
912 vx= ox; | |
913 vy= oy; | |
914 for(x=0; x<8; x++){ //XXX FIXME optimize | |
915 int src_x, src_y, frac_x, frac_y, index; | |
916 | |
917 src_x= vx>>16; | |
918 src_y= vy>>16; | |
919 frac_x= src_x&(s-1); | |
920 frac_y= src_y&(s-1); | |
921 src_x>>=shift; | |
922 src_y>>=shift; | |
923 | |
924 if((unsigned)src_x < width){ | |
925 if((unsigned)src_y < height){ | |
926 index= src_x + src_y*stride; | |
927 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) | |
928 + src[index +1]* frac_x )*(s-frac_y) | |
929 + ( src[index+stride ]*(s-frac_x) | |
930 + src[index+stride+1]* frac_x )* frac_y | |
931 + r)>>(shift*2); | |
932 }else{ | |
933 index= src_x + clip(src_y, 0, height)*stride; | |
934 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) | |
935 + src[index +1]* frac_x )*s | |
936 + r)>>(shift*2); | |
937 } | |
938 }else{ | |
939 if((unsigned)src_y < height){ | |
940 index= clip(src_x, 0, width) + src_y*stride; | |
941 dst[y*stride + x]= ( ( src[index ]*(s-frac_y) | |
942 + src[index+stride ]* frac_y )*s | |
943 + r)>>(shift*2); | |
944 }else{ | |
945 index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride; | |
946 dst[y*stride + x]= src[index ]; | |
947 } | |
948 } | |
949 | |
950 vx+= dxx; | |
951 vy+= dyx; | |
952 } | |
953 ox += dxy; | |
954 oy += dyy; | |
955 } | |
956 } | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
957 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
958 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
959 switch(width){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
960 case 2: put_pixels2_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
961 case 4: put_pixels4_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
962 case 8: put_pixels8_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
963 case 16:put_pixels16_c(dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
964 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
965 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
966 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
967 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
968 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
969 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
970 for (j=0; j < width; j++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
971 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
972 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
973 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
974 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
975 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
976 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
977 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
978 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
979 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
980 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
981 for (j=0; j < width; j++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
982 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
983 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
984 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
985 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
986 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
987 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
988 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
989 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
990 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
991 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
992 for (j=0; j < width; j++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
993 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
994 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
995 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
996 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
997 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
998 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
999 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1000 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1001 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1002 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1003 for (j=0; j < width; j++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1004 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1005 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1006 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1007 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1008 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1009 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1010 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1011 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1012 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1013 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1014 for (j=0; j < width; j++) { |
1329 | 1015 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1016 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1017 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1018 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1019 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1020 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1021 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1022 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1023 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1024 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1025 for (j=0; j < width; j++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1026 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1027 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1028 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1029 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1030 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1031 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1032 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1033 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1034 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1035 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1036 for (j=0; j < width; j++) { |
1329 | 1037 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1038 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1039 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1040 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1041 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1042 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1043 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1044 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1045 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1046 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1047 for (j=0; j < width; j++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1048 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1049 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1050 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1051 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1052 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1053 } |
1319 | 1054 |
1055 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1056 switch(width){ | |
1057 case 2: avg_pixels2_c (dst, src, stride, height); break; | |
1058 case 4: avg_pixels4_c (dst, src, stride, height); break; | |
1059 case 8: avg_pixels8_c (dst, src, stride, height); break; | |
1060 case 16:avg_pixels16_c(dst, src, stride, height); break; | |
1061 } | |
1062 } | |
1063 | |
1064 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1065 int i,j; | |
1066 for (i=0; i < height; i++) { | |
1067 for (j=0; j < width; j++) { | |
1068 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; | |
1069 } | |
1070 src += stride; | |
1071 dst += stride; | |
1072 } | |
1073 } | |
1074 | |
1075 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1076 int i,j; | |
1077 for (i=0; i < height; i++) { | |
1078 for (j=0; j < width; j++) { | |
1079 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; | |
1080 } | |
1081 src += stride; | |
1082 dst += stride; | |
1083 } | |
1084 } | |
1085 | |
1086 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1087 int i,j; | |
1088 for (i=0; i < height; i++) { | |
1089 for (j=0; j < width; j++) { | |
1090 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; | |
1091 } | |
1092 src += stride; | |
1093 dst += stride; | |
1094 } | |
1095 } | |
1096 | |
1097 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1098 int i,j; | |
1099 for (i=0; i < height; i++) { | |
1100 for (j=0; j < width; j++) { | |
1101 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |
1102 } | |
1103 src += stride; | |
1104 dst += stride; | |
1105 } | |
1106 } | |
1107 | |
1108 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1109 int i,j; | |
1110 for (i=0; i < height; i++) { | |
1111 for (j=0; j < width; j++) { | |
1329 | 1112 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1113 } |
1114 src += stride; | |
1115 dst += stride; | |
1116 } | |
1117 } | |
1118 | |
1119 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1120 int i,j; | |
1121 for (i=0; i < height; i++) { | |
1122 for (j=0; j < width; j++) { | |
1123 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; | |
1124 } | |
1125 src += stride; | |
1126 dst += stride; | |
1127 } | |
1128 } | |
1129 | |
1130 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1131 int i,j; | |
1132 for (i=0; i < height; i++) { | |
1133 for (j=0; j < width; j++) { | |
1329 | 1134 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1135 } |
1136 src += stride; | |
1137 dst += stride; | |
1138 } | |
1139 } | |
1140 | |
1141 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1142 int i,j; | |
1143 for (i=0; i < height; i++) { | |
1144 for (j=0; j < width; j++) { | |
1145 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |
1146 } | |
1147 src += stride; | |
1148 dst += stride; | |
1149 } | |
1150 } | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1151 #if 0 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1152 #define TPEL_WIDTH(width)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1153 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1154 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1155 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1156 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1157 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1158 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1159 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1160 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1161 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1162 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1163 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1164 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1165 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1166 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1167 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1168 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1169 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1170 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);} |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1171 #endif |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1172 |
1168 | 1173 #define H264_CHROMA_MC(OPNAME, OP)\ |
1174 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1175 const int A=(8-x)*(8-y);\ | |
1176 const int B=( x)*(8-y);\ | |
1177 const int C=(8-x)*( y);\ | |
1178 const int D=( x)*( y);\ | |
1179 int i;\ | |
1180 \ | |
1181 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1182 \ | |
1183 for(i=0; i<h; i++)\ | |
1184 {\ | |
1185 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | |
1186 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1187 dst+= stride;\ | |
1188 src+= stride;\ | |
1189 }\ | |
1190 }\ | |
1191 \ | |
1192 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1193 const int A=(8-x)*(8-y);\ | |
1194 const int B=( x)*(8-y);\ | |
1195 const int C=(8-x)*( y);\ | |
1196 const int D=( x)*( y);\ | |
1197 int i;\ | |
1198 \ | |
1199 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1200 \ | |
1201 for(i=0; i<h; i++)\ | |
1202 {\ | |
1203 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | |
1204 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1205 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | |
1206 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | |
1207 dst+= stride;\ | |
1208 src+= stride;\ | |
1209 }\ | |
1210 }\ | |
1211 \ | |
1212 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1213 const int A=(8-x)*(8-y);\ | |
1214 const int B=( x)*(8-y);\ | |
1215 const int C=(8-x)*( y);\ | |
1216 const int D=( x)*( y);\ | |
1217 int i;\ | |
1218 \ | |
1219 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1220 \ | |
1221 for(i=0; i<h; i++)\ | |
1222 {\ | |
1223 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | |
1224 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1225 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | |
1226 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | |
1227 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ | |
1228 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ | |
1229 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ | |
1230 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ | |
1231 dst+= stride;\ | |
1232 src+= stride;\ | |
1233 }\ | |
1234 } | |
1235 | |
1236 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) | |
1237 #define op_put(a, b) a = (((b) + 32)>>6) | |
1238 | |
1239 H264_CHROMA_MC(put_ , op_put) | |
1240 H264_CHROMA_MC(avg_ , op_avg) | |
1241 #undef op_avg | |
1242 #undef op_put | |
1243 | |
1244 static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | |
1245 { | |
1246 int i; | |
1247 for(i=0; i<h; i++) | |
1248 { | |
1249 ST32(dst , LD32(src )); | |
1250 dst+=dstStride; | |
1251 src+=srcStride; | |
1252 } | |
1253 } | |
1254 | |
1255 static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | |
1256 { | |
1257 int i; | |
1258 for(i=0; i<h; i++) | |
1259 { | |
1260 ST32(dst , LD32(src )); | |
1261 ST32(dst+4 , LD32(src+4 )); | |
1262 dst+=dstStride; | |
1263 src+=srcStride; | |
1264 } | |
1265 } | |
1266 | |
1267 static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | |
1268 { | |
1269 int i; | |
1270 for(i=0; i<h; i++) | |
1271 { | |
1272 ST32(dst , LD32(src )); | |
1273 ST32(dst+4 , LD32(src+4 )); | |
1274 ST32(dst+8 , LD32(src+8 )); | |
1275 ST32(dst+12, LD32(src+12)); | |
1276 dst+=dstStride; | |
1277 src+=srcStride; | |
1278 } | |
1279 } | |
753 | 1280 |
1064 | 1281 static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) |
255 | 1282 { |
1283 int i; | |
1284 for(i=0; i<h; i++) | |
1285 { | |
651 | 1286 ST32(dst , LD32(src )); |
1287 ST32(dst+4 , LD32(src+4 )); | |
1288 ST32(dst+8 , LD32(src+8 )); | |
1289 ST32(dst+12, LD32(src+12)); | |
1290 dst[16]= src[16]; | |
255 | 1291 dst+=dstStride; |
1292 src+=srcStride; | |
1293 } | |
1294 } | |
1295 | |
1064 | 1296 static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) |
255 | 1297 { |
1298 int i; | |
651 | 1299 for(i=0; i<h; i++) |
255 | 1300 { |
651 | 1301 ST32(dst , LD32(src )); |
1302 ST32(dst+4 , LD32(src+4 )); | |
1303 dst[8]= src[8]; | |
255 | 1304 dst+=dstStride; |
1305 src+=srcStride; | |
1306 } | |
1307 } | |
1308 | |
954 | 1309 |
651 | 1310 #define QPEL_MC(r, OPNAME, RND, OP) \ |
1064 | 1311 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
1312 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
651 | 1313 int i;\ |
1314 for(i=0; i<h; i++)\ | |
1315 {\ | |
1316 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\ | |
1317 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\ | |
1318 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\ | |
1319 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\ | |
1320 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\ | |
1321 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\ | |
1322 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\ | |
1323 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\ | |
1324 dst+=dstStride;\ | |
1325 src+=srcStride;\ | |
1326 }\ | |
1327 }\ | |
1328 \ | |
1064 | 1329 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
984 | 1330 const int w=8;\ |
1064 | 1331 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ |
651 | 1332 int i;\ |
1333 for(i=0; i<w; i++)\ | |
1334 {\ | |
1335 const int src0= src[0*srcStride];\ | |
1336 const int src1= src[1*srcStride];\ | |
1337 const int src2= src[2*srcStride];\ | |
1338 const int src3= src[3*srcStride];\ | |
1339 const int src4= src[4*srcStride];\ | |
1340 const int src5= src[5*srcStride];\ | |
1341 const int src6= src[6*srcStride];\ | |
1342 const int src7= src[7*srcStride];\ | |
1343 const int src8= src[8*srcStride];\ | |
1344 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\ | |
1345 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\ | |
1346 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\ | |
1347 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\ | |
1348 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\ | |
1349 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\ | |
1350 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\ | |
1351 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ | |
1352 dst++;\ | |
1353 src++;\ | |
1354 }\ | |
1355 }\ | |
1356 \ | |
1064 | 1357 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
1358 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
651 | 1359 int i;\ |
954 | 1360 \ |
651 | 1361 for(i=0; i<h; i++)\ |
1362 {\ | |
1363 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\ | |
1364 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\ | |
1365 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\ | |
1366 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\ | |
1367 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\ | |
1368 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\ | |
1369 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\ | |
1370 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\ | |
1371 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\ | |
1372 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\ | |
1373 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\ | |
1374 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\ | |
1375 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\ | |
1376 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\ | |
1377 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\ | |
1378 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\ | |
1379 dst+=dstStride;\ | |
1380 src+=srcStride;\ | |
1381 }\ | |
255 | 1382 }\ |
1383 \ | |
1064 | 1384 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
1385 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
651 | 1386 int i;\ |
954 | 1387 const int w=16;\ |
651 | 1388 for(i=0; i<w; i++)\ |
1389 {\ | |
1390 const int src0= src[0*srcStride];\ | |
1391 const int src1= src[1*srcStride];\ | |
1392 const int src2= src[2*srcStride];\ | |
1393 const int src3= src[3*srcStride];\ | |
1394 const int src4= src[4*srcStride];\ | |
1395 const int src5= src[5*srcStride];\ | |
1396 const int src6= src[6*srcStride];\ | |
1397 const int src7= src[7*srcStride];\ | |
1398 const int src8= src[8*srcStride];\ | |
1399 const int src9= src[9*srcStride];\ | |
1400 const int src10= src[10*srcStride];\ | |
1401 const int src11= src[11*srcStride];\ | |
1402 const int src12= src[12*srcStride];\ | |
1403 const int src13= src[13*srcStride];\ | |
1404 const int src14= src[14*srcStride];\ | |
1405 const int src15= src[15*srcStride];\ | |
1406 const int src16= src[16*srcStride];\ | |
1407 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\ | |
1408 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\ | |
1409 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\ | |
1410 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\ | |
1411 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\ | |
1412 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\ | |
1413 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\ | |
1414 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\ | |
1415 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\ | |
1416 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\ | |
1417 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\ | |
1418 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\ | |
1419 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\ | |
1420 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\ | |
1421 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\ | |
1422 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ | |
1423 dst++;\ | |
1424 src++;\ | |
1425 }\ | |
255 | 1426 }\ |
1427 \ | |
1064 | 1428 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ |
859 | 1429 OPNAME ## pixels8_c(dst, src, stride, 8);\ |
255 | 1430 }\ |
1431 \ | |
1064 | 1432 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ |
1433 uint8_t half[64];\ | |
651 | 1434 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ |
1435 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\ | |
1436 }\ | |
1437 \ | |
1064 | 1438 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ |
651 | 1439 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\ |
255 | 1440 }\ |
1441 \ | |
1064 | 1442 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ |
1443 uint8_t half[64];\ | |
651 | 1444 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ |
1445 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\ | |
1446 }\ | |
1447 \ | |
1064 | 1448 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ |
1449 uint8_t full[16*9];\ | |
1450 uint8_t half[64];\ | |
651 | 1451 copy_block9(full, src, 16, stride, 9);\ |
984 | 1452 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ |
651 | 1453 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\ |
1454 }\ | |
1455 \ | |
1064 | 1456 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ |
1457 uint8_t full[16*9];\ | |
651 | 1458 copy_block9(full, src, 16, stride, 9);\ |
984 | 1459 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ |
255 | 1460 }\ |
1461 \ | |
1064 | 1462 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ |
1463 uint8_t full[16*9];\ | |
1464 uint8_t half[64];\ | |
651 | 1465 copy_block9(full, src, 16, stride, 9);\ |
984 | 1466 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ |
651 | 1467 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\ |
1468 }\ | |
1064 | 1469 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1470 uint8_t full[16*9];\ | |
1471 uint8_t halfH[72];\ | |
1472 uint8_t halfV[64];\ | |
1473 uint8_t halfHV[64];\ | |
651 | 1474 copy_block9(full, src, 16, stride, 9);\ |
1475 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1476 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1477 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1478 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1479 }\ |
1064 | 1480 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ |
1481 uint8_t full[16*9];\ | |
1482 uint8_t halfH[72];\ | |
1483 uint8_t halfHV[64];\ | |
984 | 1484 copy_block9(full, src, 16, stride, 9);\ |
1485 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1486 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1487 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1488 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
1489 }\ | |
1064 | 1490 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1491 uint8_t full[16*9];\ | |
1492 uint8_t halfH[72];\ | |
1493 uint8_t halfV[64];\ | |
1494 uint8_t halfHV[64];\ | |
651 | 1495 copy_block9(full, src, 16, stride, 9);\ |
1496 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1497 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1498 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1499 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1500 }\ |
1064 | 1501 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ |
1502 uint8_t full[16*9];\ | |
1503 uint8_t halfH[72];\ | |
1504 uint8_t halfHV[64];\ | |
984 | 1505 copy_block9(full, src, 16, stride, 9);\ |
1506 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1507 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1508 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1509 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
1510 }\ | |
1064 | 1511 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1512 uint8_t full[16*9];\ | |
1513 uint8_t halfH[72];\ | |
1514 uint8_t halfV[64];\ | |
1515 uint8_t halfHV[64];\ | |
651 | 1516 copy_block9(full, src, 16, stride, 9);\ |
1517 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1518 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1519 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1520 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
1521 }\ | |
1064 | 1522 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ |
1523 uint8_t full[16*9];\ | |
1524 uint8_t halfH[72];\ | |
1525 uint8_t halfHV[64];\ | |
984 | 1526 copy_block9(full, src, 16, stride, 9);\ |
1527 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1528 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1529 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1530 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
1531 }\ | |
1064 | 1532 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1533 uint8_t full[16*9];\ | |
1534 uint8_t halfH[72];\ | |
1535 uint8_t halfV[64];\ | |
1536 uint8_t halfHV[64];\ | |
651 | 1537 copy_block9(full, src, 16, stride, 9);\ |
1538 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\ | |
984 | 1539 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1540 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1541 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1542 }\ |
1064 | 1543 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ |
1544 uint8_t full[16*9];\ | |
1545 uint8_t halfH[72];\ | |
1546 uint8_t halfHV[64];\ | |
984 | 1547 copy_block9(full, src, 16, stride, 9);\ |
1548 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1549 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1550 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1551 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
1552 }\ | |
1064 | 1553 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ |
1554 uint8_t halfH[72];\ | |
1555 uint8_t halfHV[64];\ | |
651 | 1556 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1557 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ |
651 | 1558 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ |
1559 }\ | |
1064 | 1560 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ |
1561 uint8_t halfH[72];\ | |
1562 uint8_t halfHV[64];\ | |
651 | 1563 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1564 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ |
651 | 1565 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ |
1566 }\ | |
1064 | 1567 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1568 uint8_t full[16*9];\ | |
1569 uint8_t halfH[72];\ | |
1570 uint8_t halfV[64];\ | |
1571 uint8_t halfHV[64];\ | |
651 | 1572 copy_block9(full, src, 16, stride, 9);\ |
1573 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1574 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1575 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1576 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ |
255 | 1577 }\ |
1064 | 1578 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ |
1579 uint8_t full[16*9];\ | |
1580 uint8_t halfH[72];\ | |
984 | 1581 copy_block9(full, src, 16, stride, 9);\ |
1582 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1583 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1584 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
1585 }\ | |
1064 | 1586 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1587 uint8_t full[16*9];\ | |
1588 uint8_t halfH[72];\ | |
1589 uint8_t halfV[64];\ | |
1590 uint8_t halfHV[64];\ | |
651 | 1591 copy_block9(full, src, 16, stride, 9);\ |
1592 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1593 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1594 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1595 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ |
1596 }\ | |
1064 | 1597 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ |
1598 uint8_t full[16*9];\ | |
1599 uint8_t halfH[72];\ | |
984 | 1600 copy_block9(full, src, 16, stride, 9);\ |
1601 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1602 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1603 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
1604 }\ | |
1064 | 1605 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ |
1606 uint8_t halfH[72];\ | |
651 | 1607 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1608 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ |
651 | 1609 }\ |
1064 | 1610 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ |
859 | 1611 OPNAME ## pixels16_c(dst, src, stride, 16);\ |
255 | 1612 }\ |
651 | 1613 \ |
1064 | 1614 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ |
1615 uint8_t half[256];\ | |
651 | 1616 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ |
1617 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\ | |
1618 }\ | |
1619 \ | |
1064 | 1620 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ |
651 | 1621 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\ |
1622 }\ | |
1623 \ | |
1064 | 1624 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ |
1625 uint8_t half[256];\ | |
651 | 1626 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ |
1627 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\ | |
1628 }\ | |
1629 \ | |
1064 | 1630 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ |
1631 uint8_t full[24*17];\ | |
1632 uint8_t half[256];\ | |
651 | 1633 copy_block17(full, src, 24, stride, 17);\ |
954 | 1634 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ |
651 | 1635 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\ |
255 | 1636 }\ |
651 | 1637 \ |
1064 | 1638 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ |
1639 uint8_t full[24*17];\ | |
651 | 1640 copy_block17(full, src, 24, stride, 17);\ |
954 | 1641 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\ |
651 | 1642 }\ |
1643 \ | |
1064 | 1644 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ |
1645 uint8_t full[24*17];\ | |
1646 uint8_t half[256];\ | |
651 | 1647 copy_block17(full, src, 24, stride, 17);\ |
954 | 1648 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ |
651 | 1649 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\ |
255 | 1650 }\ |
1064 | 1651 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1652 uint8_t full[24*17];\ | |
1653 uint8_t halfH[272];\ | |
1654 uint8_t halfV[256];\ | |
1655 uint8_t halfHV[256];\ | |
651 | 1656 copy_block17(full, src, 24, stride, 17);\ |
1657 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1658 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
1659 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1660 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
1661 }\ | |
1064 | 1662 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ |
1663 uint8_t full[24*17];\ | |
1664 uint8_t halfH[272];\ | |
1665 uint8_t halfHV[256];\ | |
984 | 1666 copy_block17(full, src, 24, stride, 17);\ |
1667 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1668 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
1669 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1670 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
1671 }\ | |
1064 | 1672 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1673 uint8_t full[24*17];\ | |
1674 uint8_t halfH[272];\ | |
1675 uint8_t halfV[256];\ | |
1676 uint8_t halfHV[256];\ | |
651 | 1677 copy_block17(full, src, 24, stride, 17);\ |
1678 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1679 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
1680 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1681 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
1682 }\ | |
1064 | 1683 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ |
1684 uint8_t full[24*17];\ | |
1685 uint8_t halfH[272];\ | |
1686 uint8_t halfHV[256];\ | |
984 | 1687 copy_block17(full, src, 24, stride, 17);\ |
1688 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1689 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
1690 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1691 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
1692 }\ | |
1064 | 1693 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1694 uint8_t full[24*17];\ | |
1695 uint8_t halfH[272];\ | |
1696 uint8_t halfV[256];\ | |
1697 uint8_t halfHV[256];\ | |
651 | 1698 copy_block17(full, src, 24, stride, 17);\ |
1699 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1700 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
1701 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1702 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
255 | 1703 }\ |
1064 | 1704 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ |
1705 uint8_t full[24*17];\ | |
1706 uint8_t halfH[272];\ | |
1707 uint8_t halfHV[256];\ | |
984 | 1708 copy_block17(full, src, 24, stride, 17);\ |
1709 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1710 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
1711 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1712 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
1713 }\ | |
1064 | 1714 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1715 uint8_t full[24*17];\ | |
1716 uint8_t halfH[272];\ | |
1717 uint8_t halfV[256];\ | |
1718 uint8_t halfHV[256];\ | |
651 | 1719 copy_block17(full, src, 24, stride, 17);\ |
1720 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\ | |
954 | 1721 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
1722 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1723 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
1724 }\ | |
1064 | 1725 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ |
1726 uint8_t full[24*17];\ | |
1727 uint8_t halfH[272];\ | |
1728 uint8_t halfHV[256];\ | |
984 | 1729 copy_block17(full, src, 24, stride, 17);\ |
1730 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1731 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
1732 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1733 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
1734 }\ | |
1064 | 1735 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ |
1736 uint8_t halfH[272];\ | |
1737 uint8_t halfHV[256];\ | |
651 | 1738 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 1739 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ |
651 | 1740 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ |
255 | 1741 }\ |
1064 | 1742 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ |
1743 uint8_t halfH[272];\ | |
1744 uint8_t halfHV[256];\ | |
651 | 1745 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 1746 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ |
651 | 1747 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ |
1748 }\ | |
1064 | 1749 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1750 uint8_t full[24*17];\ | |
1751 uint8_t halfH[272];\ | |
1752 uint8_t halfV[256];\ | |
1753 uint8_t halfHV[256];\ | |
651 | 1754 copy_block17(full, src, 24, stride, 17);\ |
1755 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1756 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
1757 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1758 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ |
255 | 1759 }\ |
1064 | 1760 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ |
1761 uint8_t full[24*17];\ | |
1762 uint8_t halfH[272];\ | |
984 | 1763 copy_block17(full, src, 24, stride, 17);\ |
1764 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1765 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
1766 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
1767 }\ | |
1064 | 1768 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1769 uint8_t full[24*17];\ | |
1770 uint8_t halfH[272];\ | |
1771 uint8_t halfV[256];\ | |
1772 uint8_t halfHV[256];\ | |
651 | 1773 copy_block17(full, src, 24, stride, 17);\ |
1774 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1775 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
1776 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1777 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ |
1778 }\ | |
1064 | 1779 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ |
1780 uint8_t full[24*17];\ | |
1781 uint8_t halfH[272];\ | |
984 | 1782 copy_block17(full, src, 24, stride, 17);\ |
1783 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1784 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
1785 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
1786 }\ | |
1064 | 1787 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ |
1788 uint8_t halfH[272];\ | |
651 | 1789 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 1790 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ |
859 | 1791 } |
255 | 1792 |
651 | 1793 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) |
1794 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1) | |
1795 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
1796 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5] | |
1797 | |
1798 QPEL_MC(0, put_ , _ , op_put) | |
1799 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) | |
1800 QPEL_MC(0, avg_ , _ , op_avg) | |
1801 //QPEL_MC(1, avg_no_rnd , _ , op_avg) | |
1802 #undef op_avg | |
1803 #undef op_avg_no_rnd | |
1804 #undef op_put | |
1805 #undef op_put_no_rnd | |
255 | 1806 |
1168 | 1807 #if 1 |
1808 #define H264_LOWPASS(OPNAME, OP, OP2) \ | |
1809 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
1810 const int h=4;\ | |
1811 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
1812 int i;\ | |
1813 for(i=0; i<h; i++)\ | |
1814 {\ | |
1815 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ | |
1816 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ | |
1817 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ | |
1818 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ | |
1819 dst+=dstStride;\ | |
1820 src+=srcStride;\ | |
1821 }\ | |
1822 }\ | |
1823 \ | |
1824 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
1825 const int w=4;\ | |
1826 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
1827 int i;\ | |
1828 for(i=0; i<w; i++)\ | |
1829 {\ | |
1830 const int srcB= src[-2*srcStride];\ | |
1831 const int srcA= src[-1*srcStride];\ | |
1832 const int src0= src[0 *srcStride];\ | |
1833 const int src1= src[1 *srcStride];\ | |
1834 const int src2= src[2 *srcStride];\ | |
1835 const int src3= src[3 *srcStride];\ | |
1836 const int src4= src[4 *srcStride];\ | |
1837 const int src5= src[5 *srcStride];\ | |
1838 const int src6= src[6 *srcStride];\ | |
1839 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
1840 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
1841 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
1842 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
1843 dst++;\ | |
1844 src++;\ | |
1845 }\ | |
1846 }\ | |
1847 \ | |
1848 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
1849 const int h=4;\ | |
1850 const int w=4;\ | |
1851 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
1852 int i;\ | |
1853 src -= 2*srcStride;\ | |
1854 for(i=0; i<h+5; i++)\ | |
1855 {\ | |
1856 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ | |
1857 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ | |
1858 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\ | |
1859 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\ | |
1860 tmp+=tmpStride;\ | |
1861 src+=srcStride;\ | |
1862 }\ | |
1863 tmp -= tmpStride*(h+5-2);\ | |
1864 for(i=0; i<w; i++)\ | |
1865 {\ | |
1866 const int tmpB= tmp[-2*tmpStride];\ | |
1867 const int tmpA= tmp[-1*tmpStride];\ | |
1868 const int tmp0= tmp[0 *tmpStride];\ | |
1869 const int tmp1= tmp[1 *tmpStride];\ | |
1870 const int tmp2= tmp[2 *tmpStride];\ | |
1871 const int tmp3= tmp[3 *tmpStride];\ | |
1872 const int tmp4= tmp[4 *tmpStride];\ | |
1873 const int tmp5= tmp[5 *tmpStride];\ | |
1874 const int tmp6= tmp[6 *tmpStride];\ | |
1875 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |
1876 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |
1877 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | |
1878 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | |
1879 dst++;\ | |
1880 tmp++;\ | |
1881 }\ | |
1882 }\ | |
1883 \ | |
1884 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
1885 const int h=8;\ | |
1886 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
1887 int i;\ | |
1888 for(i=0; i<h; i++)\ | |
1889 {\ | |
1890 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\ | |
1891 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\ | |
1892 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\ | |
1893 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\ | |
1894 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\ | |
1895 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\ | |
1896 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\ | |
1897 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\ | |
1898 dst+=dstStride;\ | |
1899 src+=srcStride;\ | |
1900 }\ | |
1901 }\ | |
1902 \ | |
1903 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
1904 const int w=8;\ | |
1905 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
1906 int i;\ | |
1907 for(i=0; i<w; i++)\ | |
1908 {\ | |
1909 const int srcB= src[-2*srcStride];\ | |
1910 const int srcA= src[-1*srcStride];\ | |
1911 const int src0= src[0 *srcStride];\ | |
1912 const int src1= src[1 *srcStride];\ | |
1913 const int src2= src[2 *srcStride];\ | |
1914 const int src3= src[3 *srcStride];\ | |
1915 const int src4= src[4 *srcStride];\ | |
1916 const int src5= src[5 *srcStride];\ | |
1917 const int src6= src[6 *srcStride];\ | |
1918 const int src7= src[7 *srcStride];\ | |
1919 const int src8= src[8 *srcStride];\ | |
1920 const int src9= src[9 *srcStride];\ | |
1921 const int src10=src[10*srcStride];\ | |
1922 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
1923 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
1924 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
1925 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
1926 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ | |
1927 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ | |
1928 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ | |
1929 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ | |
1930 dst++;\ | |
1931 src++;\ | |
1932 }\ | |
1933 }\ | |
1934 \ | |
1935 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
1936 const int h=8;\ | |
1937 const int w=8;\ | |
1938 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
1939 int i;\ | |
1940 src -= 2*srcStride;\ | |
1941 for(i=0; i<h+5; i++)\ | |
1942 {\ | |
1943 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\ | |
1944 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\ | |
1945 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\ | |
1946 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\ | |
1947 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\ | |
1948 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\ | |
1949 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\ | |
1950 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\ | |
1951 tmp+=tmpStride;\ | |
1952 src+=srcStride;\ | |
1953 }\ | |
1954 tmp -= tmpStride*(h+5-2);\ | |
1955 for(i=0; i<w; i++)\ | |
1956 {\ | |
1957 const int tmpB= tmp[-2*tmpStride];\ | |
1958 const int tmpA= tmp[-1*tmpStride];\ | |
1959 const int tmp0= tmp[0 *tmpStride];\ | |
1960 const int tmp1= tmp[1 *tmpStride];\ | |
1961 const int tmp2= tmp[2 *tmpStride];\ | |
1962 const int tmp3= tmp[3 *tmpStride];\ | |
1963 const int tmp4= tmp[4 *tmpStride];\ | |
1964 const int tmp5= tmp[5 *tmpStride];\ | |
1965 const int tmp6= tmp[6 *tmpStride];\ | |
1966 const int tmp7= tmp[7 *tmpStride];\ | |
1967 const int tmp8= tmp[8 *tmpStride];\ | |
1968 const int tmp9= tmp[9 *tmpStride];\ | |
1969 const int tmp10=tmp[10*tmpStride];\ | |
1970 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |
1971 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |
1972 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | |
1973 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | |
1974 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\ | |
1975 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\ | |
1976 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\ | |
1977 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\ | |
1978 dst++;\ | |
1979 tmp++;\ | |
1980 }\ | |
1981 }\ | |
1982 \ | |
1983 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
1984 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | |
1985 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
1986 src += 8*srcStride;\ | |
1987 dst += 8*dstStride;\ | |
1988 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | |
1989 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
1990 }\ | |
1991 \ | |
1992 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
1993 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | |
1994 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
1995 src += 8*srcStride;\ | |
1996 dst += 8*dstStride;\ | |
1997 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | |
1998 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
1999 }\ | |
2000 \ | |
2001 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2002 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | |
2003 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | |
2004 src += 8*srcStride;\ | |
2005 tmp += 8*tmpStride;\ | |
2006 dst += 8*dstStride;\ | |
2007 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | |
2008 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | |
2009 }\ | |
2010 | |
2011 #define H264_MC(OPNAME, SIZE) \ | |
2012 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\ | |
2013 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ | |
2014 }\ | |
2015 \ | |
2016 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2017 uint8_t half[SIZE*SIZE];\ | |
2018 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
2019 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\ | |
2020 }\ | |
2021 \ | |
2022 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2023 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ | |
2024 }\ | |
2025 \ | |
2026 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2027 uint8_t half[SIZE*SIZE];\ | |
2028 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
2029 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\ | |
2030 }\ | |
2031 \ | |
2032 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2033 uint8_t full[SIZE*(SIZE+5)];\ | |
2034 uint8_t * const full_mid= full + SIZE*2;\ | |
2035 uint8_t half[SIZE*SIZE];\ | |
2036 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2037 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
2038 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ | |
2039 }\ | |
2040 \ | |
2041 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2042 uint8_t full[SIZE*(SIZE+5)];\ | |
2043 uint8_t * const full_mid= full + SIZE*2;\ | |
2044 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2045 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ | |
2046 }\ | |
2047 \ | |
2048 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2049 uint8_t full[SIZE*(SIZE+5)];\ | |
2050 uint8_t * const full_mid= full + SIZE*2;\ | |
2051 uint8_t half[SIZE*SIZE];\ | |
2052 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2053 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
2054 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ | |
2055 }\ | |
2056 \ | |
2057 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2058 uint8_t full[SIZE*(SIZE+5)];\ | |
2059 uint8_t * const full_mid= full + SIZE*2;\ | |
2060 uint8_t halfH[SIZE*SIZE];\ | |
2061 uint8_t halfV[SIZE*SIZE];\ | |
2062 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2063 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2064 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2065 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2066 }\ | |
2067 \ | |
2068 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2069 uint8_t full[SIZE*(SIZE+5)];\ | |
2070 uint8_t * const full_mid= full + SIZE*2;\ | |
2071 uint8_t halfH[SIZE*SIZE];\ | |
2072 uint8_t halfV[SIZE*SIZE];\ | |
2073 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2074 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2075 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2076 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2077 }\ | |
2078 \ | |
2079 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2080 uint8_t full[SIZE*(SIZE+5)];\ | |
2081 uint8_t * const full_mid= full + SIZE*2;\ | |
2082 uint8_t halfH[SIZE*SIZE];\ | |
2083 uint8_t halfV[SIZE*SIZE];\ | |
2084 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2085 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2086 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2087 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2088 }\ | |
2089 \ | |
2090 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2091 uint8_t full[SIZE*(SIZE+5)];\ | |
2092 uint8_t * const full_mid= full + SIZE*2;\ | |
2093 uint8_t halfH[SIZE*SIZE];\ | |
2094 uint8_t halfV[SIZE*SIZE];\ | |
2095 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2096 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2097 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2098 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2099 }\ | |
2100 \ | |
2101 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2102 int16_t tmp[SIZE*(SIZE+5)];\ | |
2103 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ | |
2104 }\ | |
2105 \ | |
2106 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2107 int16_t tmp[SIZE*(SIZE+5)];\ | |
2108 uint8_t halfH[SIZE*SIZE];\ | |
2109 uint8_t halfHV[SIZE*SIZE];\ | |
2110 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2111 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2112 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2113 }\ | |
2114 \ | |
2115 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2116 int16_t tmp[SIZE*(SIZE+5)];\ | |
2117 uint8_t halfH[SIZE*SIZE];\ | |
2118 uint8_t halfHV[SIZE*SIZE];\ | |
2119 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2120 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2121 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2122 }\ | |
2123 \ | |
2124 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2125 uint8_t full[SIZE*(SIZE+5)];\ | |
2126 uint8_t * const full_mid= full + SIZE*2;\ | |
2127 int16_t tmp[SIZE*(SIZE+5)];\ | |
2128 uint8_t halfV[SIZE*SIZE];\ | |
2129 uint8_t halfHV[SIZE*SIZE];\ | |
2130 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2131 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2132 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2133 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2134 }\ | |
2135 \ | |
2136 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2137 uint8_t full[SIZE*(SIZE+5)];\ | |
2138 uint8_t * const full_mid= full + SIZE*2;\ | |
2139 int16_t tmp[SIZE*(SIZE+5)];\ | |
2140 uint8_t halfV[SIZE*SIZE];\ | |
2141 uint8_t halfHV[SIZE*SIZE];\ | |
2142 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2143 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2144 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2145 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2146 }\ | |
2147 | |
2148 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) | |
2149 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) | |
2150 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
2151 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) | |
2152 #define op2_put(a, b) a = cm[((b) + 512)>>10] | |
2153 | |
2154 H264_LOWPASS(put_ , op_put, op2_put) | |
2155 H264_LOWPASS(avg_ , op_avg, op2_avg) | |
2156 H264_MC(put_, 4) | |
2157 H264_MC(put_, 8) | |
2158 H264_MC(put_, 16) | |
2159 H264_MC(avg_, 4) | |
2160 H264_MC(avg_, 8) | |
2161 H264_MC(avg_, 16) | |
2162 | |
2163 #undef op_avg | |
2164 #undef op_put | |
2165 #undef op2_avg | |
2166 #undef op2_put | |
2167 #endif | |
2168 | |
936 | 2169 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ |
2170 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
2171 int i; | |
2172 | |
2173 for(i=0; i<h; i++){ | |
2174 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4]; | |
2175 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4]; | |
2176 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4]; | |
2177 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4]; | |
2178 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4]; | |
2179 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; | |
2180 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; | |
2181 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; | |
2182 dst+=dstStride; | |
2183 src+=srcStride; | |
2184 } | |
2185 } | |
2186 | |
2187 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ | |
2188 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
2189 int i; | |
2190 | |
2191 for(i=0; i<w; i++){ | |
2192 const int src_1= src[ -srcStride]; | |
2193 const int src0 = src[0 ]; | |
2194 const int src1 = src[ srcStride]; | |
2195 const int src2 = src[2*srcStride]; | |
2196 const int src3 = src[3*srcStride]; | |
2197 const int src4 = src[4*srcStride]; | |
2198 const int src5 = src[5*srcStride]; | |
2199 const int src6 = src[6*srcStride]; | |
2200 const int src7 = src[7*srcStride]; | |
2201 const int src8 = src[8*srcStride]; | |
2202 const int src9 = src[9*srcStride]; | |
2203 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; | |
2204 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; | |
2205 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; | |
2206 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; | |
2207 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; | |
2208 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; | |
2209 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; | |
2210 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; | |
2211 src++; | |
2212 dst++; | |
2213 } | |
2214 } | |
2215 | |
2216 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){ | |
2217 put_pixels8_c(dst, src, stride, 8); | |
2218 } | |
2219 | |
2220 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ | |
2221 uint8_t half[64]; | |
2222 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | |
2223 put_pixels8_l2(dst, src, half, stride, stride, 8, 8); | |
2224 } | |
2225 | |
2226 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ | |
2227 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); | |
2228 } | |
2229 | |
2230 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ | |
2231 uint8_t half[64]; | |
2232 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | |
2233 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8); | |
2234 } | |
2235 | |
2236 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ | |
2237 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); | |
2238 } | |
2239 | |
2240 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ | |
2241 uint8_t halfH[88]; | |
2242 uint8_t halfV[64]; | |
2243 uint8_t halfHV[64]; | |
2244 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2245 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); | |
2246 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | |
2247 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | |
2248 } | |
2249 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ | |
2250 uint8_t halfH[88]; | |
2251 uint8_t halfV[64]; | |
2252 uint8_t halfHV[64]; | |
2253 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2254 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); | |
2255 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | |
2256 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | |
2257 } | |
2258 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ | |
2259 uint8_t halfH[88]; | |
2260 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2261 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); | |
2262 } | |
2263 | |
1644 | 2264 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){ |
2265 int x; | |
2266 const int strength= ff_h263_loop_filter_strength[qscale]; | |
2267 | |
2268 for(x=0; x<8; x++){ | |
2269 int d1, d2, ad1; | |
2270 int p0= src[x-2*stride]; | |
2271 int p1= src[x-1*stride]; | |
2272 int p2= src[x+0*stride]; | |
2273 int p3= src[x+1*stride]; | |
2274 int d = (p0 - p3 + 4*(p2 - p1)) / 8; | |
2275 | |
2276 if (d<-2*strength) d1= 0; | |
2277 else if(d<- strength) d1=-2*strength - d; | |
2278 else if(d< strength) d1= d; | |
2279 else if(d< 2*strength) d1= 2*strength - d; | |
2280 else d1= 0; | |
2281 | |
2282 p1 += d1; | |
2283 p2 -= d1; | |
2284 if(p1&256) p1= ~(p1>>31); | |
2285 if(p2&256) p2= ~(p2>>31); | |
2286 | |
2287 src[x-1*stride] = p1; | |
2288 src[x+0*stride] = p2; | |
2289 | |
1645 | 2290 ad1= ABS(d1)>>1; |
1644 | 2291 |
2292 d2= clip((p0-p3)/4, -ad1, ad1); | |
2293 | |
2294 src[x-2*stride] = p0 - d2; | |
2295 src[x+ stride] = p3 + d2; | |
2296 } | |
2297 } | |
2298 | |
2299 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ | |
2300 int y; | |
2301 const int strength= ff_h263_loop_filter_strength[qscale]; | |
2302 | |
2303 for(y=0; y<8; y++){ | |
2304 int d1, d2, ad1; | |
2305 int p0= src[y*stride-2]; | |
2306 int p1= src[y*stride-1]; | |
2307 int p2= src[y*stride+0]; | |
2308 int p3= src[y*stride+1]; | |
2309 int d = (p0 - p3 + 4*(p2 - p1)) / 8; | |
2310 | |
2311 if (d<-2*strength) d1= 0; | |
2312 else if(d<- strength) d1=-2*strength - d; | |
2313 else if(d< strength) d1= d; | |
2314 else if(d< 2*strength) d1= 2*strength - d; | |
2315 else d1= 0; | |
2316 | |
2317 p1 += d1; | |
2318 p2 -= d1; | |
2319 if(p1&256) p1= ~(p1>>31); | |
2320 if(p2&256) p2= ~(p2>>31); | |
2321 | |
2322 src[y*stride-1] = p1; | |
2323 src[y*stride+0] = p2; | |
2324 | |
2325 ad1= ABS(d1)>>1; | |
2326 | |
2327 d2= clip((p0-p3)/4, -ad1, ad1); | |
2328 | |
2329 src[y*stride-2] = p0 - d2; | |
2330 src[y*stride+1] = p3 + d2; | |
2331 } | |
2332 } | |
936 | 2333 |
1708 | 2334 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2335 { |
2336 int s, i; | |
2337 | |
2338 s = 0; | |
1708 | 2339 for(i=0;i<h;i++) { |
0 | 2340 s += abs(pix1[0] - pix2[0]); |
2341 s += abs(pix1[1] - pix2[1]); | |
2342 s += abs(pix1[2] - pix2[2]); | |
2343 s += abs(pix1[3] - pix2[3]); | |
2344 s += abs(pix1[4] - pix2[4]); | |
2345 s += abs(pix1[5] - pix2[5]); | |
2346 s += abs(pix1[6] - pix2[6]); | |
2347 s += abs(pix1[7] - pix2[7]); | |
2348 s += abs(pix1[8] - pix2[8]); | |
2349 s += abs(pix1[9] - pix2[9]); | |
2350 s += abs(pix1[10] - pix2[10]); | |
2351 s += abs(pix1[11] - pix2[11]); | |
2352 s += abs(pix1[12] - pix2[12]); | |
2353 s += abs(pix1[13] - pix2[13]); | |
2354 s += abs(pix1[14] - pix2[14]); | |
2355 s += abs(pix1[15] - pix2[15]); | |
2356 pix1 += line_size; | |
2357 pix2 += line_size; | |
2358 } | |
2359 return s; | |
2360 } | |
2361 | |
1708 | 2362 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2363 { |
2364 int s, i; | |
2365 | |
2366 s = 0; | |
1708 | 2367 for(i=0;i<h;i++) { |
0 | 2368 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
2369 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
2370 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
2371 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
2372 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
2373 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
2374 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
2375 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
2376 s += abs(pix1[8] - avg2(pix2[8], pix2[9])); | |
2377 s += abs(pix1[9] - avg2(pix2[9], pix2[10])); | |
2378 s += abs(pix1[10] - avg2(pix2[10], pix2[11])); | |
2379 s += abs(pix1[11] - avg2(pix2[11], pix2[12])); | |
2380 s += abs(pix1[12] - avg2(pix2[12], pix2[13])); | |
2381 s += abs(pix1[13] - avg2(pix2[13], pix2[14])); | |
2382 s += abs(pix1[14] - avg2(pix2[14], pix2[15])); | |
2383 s += abs(pix1[15] - avg2(pix2[15], pix2[16])); | |
2384 pix1 += line_size; | |
2385 pix2 += line_size; | |
2386 } | |
2387 return s; | |
2388 } | |
2389 | |
1708 | 2390 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2391 { |
2392 int s, i; | |
1064 | 2393 uint8_t *pix3 = pix2 + line_size; |
0 | 2394 |
2395 s = 0; | |
1708 | 2396 for(i=0;i<h;i++) { |
0 | 2397 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
2398 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
2399 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
2400 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
2401 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
2402 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
2403 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
2404 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
2405 s += abs(pix1[8] - avg2(pix2[8], pix3[8])); | |
2406 s += abs(pix1[9] - avg2(pix2[9], pix3[9])); | |
2407 s += abs(pix1[10] - avg2(pix2[10], pix3[10])); | |
2408 s += abs(pix1[11] - avg2(pix2[11], pix3[11])); | |
2409 s += abs(pix1[12] - avg2(pix2[12], pix3[12])); | |
2410 s += abs(pix1[13] - avg2(pix2[13], pix3[13])); | |
2411 s += abs(pix1[14] - avg2(pix2[14], pix3[14])); | |
2412 s += abs(pix1[15] - avg2(pix2[15], pix3[15])); | |
2413 pix1 += line_size; | |
2414 pix2 += line_size; | |
2415 pix3 += line_size; | |
2416 } | |
2417 return s; | |
2418 } | |
2419 | |
1708 | 2420 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2421 { |
2422 int s, i; | |
1064 | 2423 uint8_t *pix3 = pix2 + line_size; |
0 | 2424 |
2425 s = 0; | |
1708 | 2426 for(i=0;i<h;i++) { |
0 | 2427 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
2428 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
2429 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
2430 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
2431 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
2432 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
2433 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
2434 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
2435 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); | |
2436 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); | |
2437 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); | |
2438 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); | |
2439 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); | |
2440 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); | |
2441 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); | |
2442 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); | |
2443 pix1 += line_size; | |
2444 pix2 += line_size; | |
2445 pix3 += line_size; | |
2446 } | |
2447 return s; | |
2448 } | |
2449 | |
1708 | 2450 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2451 { |
2452 int s, i; | |
2453 | |
2454 s = 0; | |
1708 | 2455 for(i=0;i<h;i++) { |
294 | 2456 s += abs(pix1[0] - pix2[0]); |
2457 s += abs(pix1[1] - pix2[1]); | |
2458 s += abs(pix1[2] - pix2[2]); | |
2459 s += abs(pix1[3] - pix2[3]); | |
2460 s += abs(pix1[4] - pix2[4]); | |
2461 s += abs(pix1[5] - pix2[5]); | |
2462 s += abs(pix1[6] - pix2[6]); | |
2463 s += abs(pix1[7] - pix2[7]); | |
2464 pix1 += line_size; | |
2465 pix2 += line_size; | |
2466 } | |
2467 return s; | |
2468 } | |
2469 | |
1708 | 2470 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2471 { |
2472 int s, i; | |
2473 | |
2474 s = 0; | |
1708 | 2475 for(i=0;i<h;i++) { |
294 | 2476 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
2477 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
2478 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
2479 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
2480 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
2481 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
2482 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
2483 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
2484 pix1 += line_size; | |
2485 pix2 += line_size; | |
2486 } | |
2487 return s; | |
2488 } | |
2489 | |
1708 | 2490 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2491 { |
2492 int s, i; | |
1064 | 2493 uint8_t *pix3 = pix2 + line_size; |
294 | 2494 |
2495 s = 0; | |
1708 | 2496 for(i=0;i<h;i++) { |
294 | 2497 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
2498 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
2499 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
2500 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
2501 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
2502 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
2503 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
2504 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
2505 pix1 += line_size; | |
2506 pix2 += line_size; | |
2507 pix3 += line_size; | |
2508 } | |
2509 return s; | |
2510 } | |
2511 | |
1708 | 2512 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2513 { |
2514 int s, i; | |
1064 | 2515 uint8_t *pix3 = pix2 + line_size; |
294 | 2516 |
2517 s = 0; | |
1708 | 2518 for(i=0;i<h;i++) { |
294 | 2519 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
2520 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
2521 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
2522 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
2523 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
2524 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
2525 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
2526 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
2527 pix1 += line_size; | |
2528 pix2 += line_size; | |
2529 pix3 += line_size; | |
2530 } | |
2531 return s; | |
2532 } | |
2533 | |
1100 | 2534 /** |
2535 * permutes an 8x8 block. | |
1101 | 2536 * @param block the block which will be permuted according to the given permutation vector |
1100 | 2537 * @param permutation the permutation vector |
2538 * @param last the last non zero coefficient in scantable order, used to speed the permutation up | |
1101 | 2539 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not |
2540 * (inverse) permutated to scantable order! | |
1100 | 2541 */ |
1064 | 2542 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last) |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
2543 { |
764 | 2544 int i; |
945 | 2545 DCTELEM temp[64]; |
764 | 2546 |
2547 if(last<=0) return; | |
882 | 2548 //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
2549 |
764 | 2550 for(i=0; i<=last; i++){ |
2551 const int j= scantable[i]; | |
2552 temp[j]= block[j]; | |
2553 block[j]=0; | |
2554 } | |
2555 | |
2556 for(i=0; i<=last; i++){ | |
2557 const int j= scantable[i]; | |
2558 const int perm_j= permutation[j]; | |
2559 block[perm_j]= temp[j]; | |
2560 } | |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
2561 } |
34 | 2562 |
1101 | 2563 /** |
2564 * memset(blocks, 0, sizeof(DCTELEM)*6*64) | |
2565 */ | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
2566 static void clear_blocks_c(DCTELEM *blocks) |
296 | 2567 { |
2568 memset(blocks, 0, sizeof(DCTELEM)*6*64); | |
2569 } | |
2570 | |
866 | 2571 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ |
2572 int i; | |
996
ad44196ea483
add/diff_bytes bugfix patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>)
michaelni
parents:
984
diff
changeset
|
2573 for(i=0; i+7<w; i+=8){ |
866 | 2574 dst[i+0] += src[i+0]; |
2575 dst[i+1] += src[i+1]; | |
2576 dst[i+2] += src[i+2]; | |
2577 dst[i+3] += src[i+3]; | |
2578 dst[i+4] += src[i+4]; | |
2579 dst[i+5] += src[i+5]; | |
2580 dst[i+6] += src[i+6]; | |
2581 dst[i+7] += src[i+7]; | |
2582 } | |
2583 for(; i<w; i++) | |
2584 dst[i+0] += src[i+0]; | |
2585 } | |
2586 | |
2587 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ | |
2588 int i; | |
996
ad44196ea483
add/diff_bytes bugfix patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>)
michaelni
parents:
984
diff
changeset
|
2589 for(i=0; i+7<w; i+=8){ |
866 | 2590 dst[i+0] = src1[i+0]-src2[i+0]; |
2591 dst[i+1] = src1[i+1]-src2[i+1]; | |
2592 dst[i+2] = src1[i+2]-src2[i+2]; | |
2593 dst[i+3] = src1[i+3]-src2[i+3]; | |
2594 dst[i+4] = src1[i+4]-src2[i+4]; | |
2595 dst[i+5] = src1[i+5]-src2[i+5]; | |
2596 dst[i+6] = src1[i+6]-src2[i+6]; | |
2597 dst[i+7] = src1[i+7]-src2[i+7]; | |
2598 } | |
2599 for(; i<w; i++) | |
2600 dst[i+0] = src1[i+0]-src2[i+0]; | |
2601 } | |
2602 | |
1527 | 2603 static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ |
2604 int i; | |
2605 uint8_t l, lt; | |
2606 | |
2607 l= *left; | |
2608 lt= *left_top; | |
2609 | |
2610 for(i=0; i<w; i++){ | |
2611 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); | |
2612 lt= src1[i]; | |
2613 l= src2[i]; | |
2614 dst[i]= l - pred; | |
2615 } | |
2616 | |
2617 *left= l; | |
2618 *left_top= lt; | |
2619 } | |
2620 | |
936 | 2621 #define BUTTERFLY2(o1,o2,i1,i2) \ |
2622 o1= (i1)+(i2);\ | |
2623 o2= (i1)-(i2); | |
2624 | |
2625 #define BUTTERFLY1(x,y) \ | |
2626 {\ | |
2627 int a,b;\ | |
2628 a= x;\ | |
2629 b= y;\ | |
2630 x= a+b;\ | |
2631 y= a-b;\ | |
2632 } | |
2633 | |
2634 #define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y))) | |
2635 | |
1708 | 2636 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ |
936 | 2637 int i; |
2638 int temp[64]; | |
2639 int sum=0; | |
1708 | 2640 |
2641 assert(h==8); | |
936 | 2642 |
2643 for(i=0; i<8; i++){ | |
2644 //FIXME try pointer walks | |
2645 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]); | |
2646 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]); | |
2647 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]); | |
2648 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]); | |
2649 | |
2650 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); | |
2651 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | |
2652 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | |
2653 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | |
2654 | |
2655 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); | |
2656 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | |
2657 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | |
2658 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | |
2659 } | |
2660 | |
2661 for(i=0; i<8; i++){ | |
2662 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | |
2663 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | |
2664 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | |
2665 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | |
2666 | |
2667 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); | |
2668 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | |
2669 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | |
2670 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | |
2671 | |
2672 sum += | |
2673 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) | |
2674 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | |
2675 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | |
2676 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | |
2677 } | |
2678 #if 0 | |
2679 static int maxi=0; | |
2680 if(sum>maxi){ | |
2681 maxi=sum; | |
2682 printf("MAX:%d\n", maxi); | |
2683 } | |
2684 #endif | |
2685 return sum; | |
2686 } | |
2687 | |
2688 static int hadamard8_abs_c(uint8_t *src, int stride, int mean){ | |
2689 int i; | |
2690 int temp[64]; | |
2691 int sum=0; | |
2692 //FIXME OOOPS ignore 0 term instead of mean mess | |
2693 for(i=0; i<8; i++){ | |
2694 //FIXME try pointer walks | |
2695 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-mean,src[stride*i+1]-mean); | |
2696 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-mean,src[stride*i+3]-mean); | |
2697 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-mean,src[stride*i+5]-mean); | |
2698 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-mean,src[stride*i+7]-mean); | |
2699 | |
2700 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); | |
2701 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | |
2702 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | |
2703 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | |
2704 | |
2705 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); | |
2706 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | |
2707 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | |
2708 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | |
2709 } | |
2710 | |
2711 for(i=0; i<8; i++){ | |
2712 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | |
2713 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | |
2714 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | |
2715 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | |
2716 | |
2717 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); | |
2718 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | |
2719 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | |
2720 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | |
2721 | |
2722 sum += | |
2723 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) | |
2724 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | |
2725 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | |
2726 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | |
2727 } | |
2728 | |
2729 return sum; | |
2730 } | |
2731 | |
1708 | 2732 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
936 | 2733 MpegEncContext * const s= (MpegEncContext *)c; |
1016 | 2734 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; |
2735 DCTELEM * const temp= (DCTELEM*)aligned_temp; | |
936 | 2736 int sum=0, i; |
1708 | 2737 |
2738 assert(h==8); | |
936 | 2739 |
2740 s->dsp.diff_pixels(temp, src1, src2, stride); | |
1092 | 2741 s->dsp.fdct(temp); |
936 | 2742 |
2743 for(i=0; i<64; i++) | |
2744 sum+= ABS(temp[i]); | |
2745 | |
2746 return sum; | |
2747 } | |
2748 | |
1008 | 2749 void simple_idct(DCTELEM *block); //FIXME |
936 | 2750 |
1708 | 2751 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
936 | 2752 MpegEncContext * const s= (MpegEncContext *)c; |
1016 | 2753 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8]; |
2754 DCTELEM * const temp= (DCTELEM*)aligned_temp; | |
2755 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; | |
936 | 2756 int sum=0, i; |
2757 | |
1708 | 2758 assert(h==8); |
936 | 2759 s->mb_intra=0; |
2760 | |
2761 s->dsp.diff_pixels(temp, src1, src2, stride); | |
2762 | |
2763 memcpy(bak, temp, 64*sizeof(DCTELEM)); | |
2764 | |
1013 | 2765 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
1689 | 2766 s->dct_unquantize_inter(s, temp, 0, s->qscale); |
936 | 2767 simple_idct(temp); //FIXME |
2768 | |
2769 for(i=0; i<64; i++) | |
2770 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); | |
2771 | |
2772 return sum; | |
2773 } | |
2774 | |
1708 | 2775 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
1007 | 2776 MpegEncContext * const s= (MpegEncContext *)c; |
1064 | 2777 const uint8_t *scantable= s->intra_scantable.permutated; |
1016 | 2778 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; |
2779 uint64_t __align8 aligned_bak[stride]; | |
2780 DCTELEM * const temp= (DCTELEM*)aligned_temp; | |
2781 uint8_t * const bak= (uint8_t*)aligned_bak; | |
1007 | 2782 int i, last, run, bits, level, distoration, start_i; |
2783 const int esc_length= s->ac_esc_length; | |
2784 uint8_t * length; | |
2785 uint8_t * last_length; | |
2786 | |
1708 | 2787 assert(h==8); |
2788 | |
1007 | 2789 for(i=0; i<8; i++){ |
2790 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0]; | |
2791 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1]; | |
2792 } | |
2793 | |
2794 s->dsp.diff_pixels(temp, src1, src2, stride); | |
2795 | |
1013 | 2796 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
2797 | |
2798 bits=0; | |
1007 | 2799 |
1013 | 2800 if (s->mb_intra) { |
2801 start_i = 1; | |
2802 length = s->intra_ac_vlc_length; | |
2803 last_length= s->intra_ac_vlc_last_length; | |
2804 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma | |
2805 } else { | |
2806 start_i = 0; | |
2807 length = s->inter_ac_vlc_length; | |
2808 last_length= s->inter_ac_vlc_last_length; | |
2809 } | |
2810 | |
2811 if(last>=start_i){ | |
1007 | 2812 run=0; |
2813 for(i=start_i; i<last; i++){ | |
2814 int j= scantable[i]; | |
2815 level= temp[j]; | |
2816 | |
2817 if(level){ | |
2818 level+=64; | |
2819 if((level&(~127)) == 0){ | |
2820 bits+= length[UNI_AC_ENC_INDEX(run, level)]; | |
2821 }else | |
2822 bits+= esc_length; | |
2823 run=0; | |
2824 }else | |
2825 run++; | |
2826 } | |
2827 i= scantable[last]; | |
1011 | 2828 |
2829 level= temp[i] + 64; | |
2830 | |
2831 assert(level - 64); | |
1007 | 2832 |
2833 if((level&(~127)) == 0){ | |
2834 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | |
2835 }else | |
2836 bits+= esc_length; | |
2837 | |
1013 | 2838 } |
2839 | |
2840 if(last>=0){ | |
1689 | 2841 if(s->mb_intra) |
2842 s->dct_unquantize_intra(s, temp, 0, s->qscale); | |
2843 else | |
2844 s->dct_unquantize_inter(s, temp, 0, s->qscale); | |
1007 | 2845 } |
2846 | |
1092 | 2847 s->dsp.idct_add(bak, stride, temp); |
1007 | 2848 |
1708 | 2849 distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8); |
1007 | 2850 |
1013 | 2851 return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7); |
1007 | 2852 } |
2853 | |
1708 | 2854 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
1007 | 2855 MpegEncContext * const s= (MpegEncContext *)c; |
1064 | 2856 const uint8_t *scantable= s->intra_scantable.permutated; |
1016 | 2857 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; |
2858 DCTELEM * const temp= (DCTELEM*)aligned_temp; | |
1007 | 2859 int i, last, run, bits, level, start_i; |
2860 const int esc_length= s->ac_esc_length; | |
2861 uint8_t * length; | |
2862 uint8_t * last_length; | |
1708 | 2863 |
2864 assert(h==8); | |
1013 | 2865 |
2866 s->dsp.diff_pixels(temp, src1, src2, stride); | |
1007 | 2867 |
1013 | 2868 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
2869 | |
2870 bits=0; | |
1007 | 2871 |
2872 if (s->mb_intra) { | |
1013 | 2873 start_i = 1; |
1007 | 2874 length = s->intra_ac_vlc_length; |
2875 last_length= s->intra_ac_vlc_last_length; | |
1013 | 2876 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma |
1007 | 2877 } else { |
2878 start_i = 0; | |
2879 length = s->inter_ac_vlc_length; | |
2880 last_length= s->inter_ac_vlc_last_length; | |
2881 } | |
2882 | |
1013 | 2883 if(last>=start_i){ |
1007 | 2884 run=0; |
2885 for(i=start_i; i<last; i++){ | |
2886 int j= scantable[i]; | |
2887 level= temp[j]; | |
2888 | |
2889 if(level){ | |
2890 level+=64; | |
2891 if((level&(~127)) == 0){ | |
2892 bits+= length[UNI_AC_ENC_INDEX(run, level)]; | |
2893 }else | |
2894 bits+= esc_length; | |
2895 run=0; | |
2896 }else | |
2897 run++; | |
2898 } | |
2899 i= scantable[last]; | |
1013 | 2900 |
2901 level= temp[i] + 64; | |
1007 | 2902 |
1013 | 2903 assert(level - 64); |
1007 | 2904 |
2905 if((level&(~127)) == 0){ | |
2906 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | |
2907 }else | |
2908 bits+= esc_length; | |
2909 } | |
2910 | |
2911 return bits; | |
2912 } | |
2913 | |
1708 | 2914 WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) |
2915 WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) | |
2916 WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) | |
2917 WARPER8_16_SQ(rd8x8_c, rd16_c) | |
2918 WARPER8_16_SQ(bit8x8_c, bit16_c) | |
936 | 2919 |
1092 | 2920 /* XXX: those functions should be suppressed ASAP when all IDCTs are |
2921 converted */ | |
2922 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) | |
2923 { | |
2924 j_rev_dct (block); | |
2925 put_pixels_clamped_c(block, dest, line_size); | |
2926 } | |
2927 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) | |
2928 { | |
2929 j_rev_dct (block); | |
2930 add_pixels_clamped_c(block, dest, line_size); | |
2931 } | |
2932 | |
1201 | 2933 /* init static data */ |
2934 void dsputil_static_init(void) | |
0 | 2935 { |
751 | 2936 int i; |
0 | 2937 |
1201 | 2938 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; |
2939 for(i=0;i<MAX_NEG_CROP;i++) { | |
2940 cropTbl[i] = 0; | |
2941 cropTbl[i + MAX_NEG_CROP + 256] = 255; | |
2942 } | |
2943 | |
2944 for(i=0;i<512;i++) { | |
2945 squareTbl[i] = (i - 256) * (i - 256); | |
2946 } | |
2947 | |
2948 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; | |
2949 } | |
0 | 2950 |
861 | 2951 |
1201 | 2952 void dsputil_init(DSPContext* c, AVCodecContext *avctx) |
2953 { | |
2954 int i; | |
0 | 2955 |
1092 | 2956 #ifdef CONFIG_ENCODERS |
1567 | 2957 if(avctx->dct_algo==FF_DCT_FASTINT) { |
1092 | 2958 c->fdct = fdct_ifast; |
1571
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1567
diff
changeset
|
2959 c->fdct248 = fdct_ifast248; |
1567 | 2960 } |
2961 else if(avctx->dct_algo==FF_DCT_FAAN) { | |
1557 | 2962 c->fdct = ff_faandct; |
1571
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1567
diff
changeset
|
2963 c->fdct248 = ff_faandct248; |
1567 | 2964 } |
2965 else { | |
1092 | 2966 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default |
1567 | 2967 c->fdct248 = ff_fdct248_islow; |
2968 } | |
1092 | 2969 #endif //CONFIG_ENCODERS |
2970 | |
2971 if(avctx->idct_algo==FF_IDCT_INT){ | |
2972 c->idct_put= ff_jref_idct_put; | |
2973 c->idct_add= ff_jref_idct_add; | |
1324
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1319
diff
changeset
|
2974 c->idct = j_rev_dct; |
1092 | 2975 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; |
2976 }else{ //accurate/default | |
2977 c->idct_put= simple_idct_put; | |
2978 c->idct_add= simple_idct_add; | |
1324
7d328fd9d8a5
the return of the idct with 16bit output by ("Ivan Kalvachev" <ivan at cacad dot com>)
michaelni
parents:
1319
diff
changeset
|
2979 c->idct = simple_idct; |
1092 | 2980 c->idct_permutation_type= FF_NO_IDCT_PERM; |
2981 } | |
2982 | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
2983 c->get_pixels = get_pixels_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
2984 c->diff_pixels = diff_pixels_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
2985 c->put_pixels_clamped = put_pixels_clamped_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
2986 c->add_pixels_clamped = add_pixels_clamped_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
2987 c->gmc1 = gmc1_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
2988 c->gmc = gmc_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
2989 c->clear_blocks = clear_blocks_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
2990 c->pix_sum = pix_sum_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
2991 c->pix_norm1 = pix_norm1_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
2992 |
859 | 2993 /* TODO [0] 16 [1] 8 */ |
1708 | 2994 c->pix_abs[0][0] = pix_abs16_c; |
2995 c->pix_abs[0][1] = pix_abs16_x2_c; | |
2996 c->pix_abs[0][2] = pix_abs16_y2_c; | |
2997 c->pix_abs[0][3] = pix_abs16_xy2_c; | |
2998 c->pix_abs[1][0] = pix_abs8_c; | |
2999 c->pix_abs[1][1] = pix_abs8_x2_c; | |
3000 c->pix_abs[1][2] = pix_abs8_y2_c; | |
3001 c->pix_abs[1][3] = pix_abs8_xy2_c; | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3002 |
859 | 3003 #define dspfunc(PFX, IDX, NUM) \ |
3004 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \ | |
3005 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \ | |
3006 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \ | |
3007 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3008 |
859 | 3009 dspfunc(put, 0, 16); |
3010 dspfunc(put_no_rnd, 0, 16); | |
3011 dspfunc(put, 1, 8); | |
3012 dspfunc(put_no_rnd, 1, 8); | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3013 dspfunc(put, 2, 4); |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3014 dspfunc(put, 3, 2); |
0 | 3015 |
859 | 3016 dspfunc(avg, 0, 16); |
3017 dspfunc(avg_no_rnd, 0, 16); | |
3018 dspfunc(avg, 1, 8); | |
3019 dspfunc(avg_no_rnd, 1, 8); | |
1319 | 3020 dspfunc(avg, 2, 4); |
3021 dspfunc(avg, 3, 2); | |
859 | 3022 #undef dspfunc |
857 | 3023 |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3024 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3025 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3026 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3027 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3028 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3029 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3030 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3031 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3032 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3033 |
1319 | 3034 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; |
3035 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; | |
3036 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; | |
3037 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; | |
3038 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; | |
3039 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; | |
3040 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; | |
3041 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; | |
3042 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; | |
3043 | |
859 | 3044 #define dspfunc(PFX, IDX, NUM) \ |
3045 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \ | |
3046 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \ | |
3047 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \ | |
3048 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \ | |
3049 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \ | |
3050 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \ | |
3051 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \ | |
3052 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \ | |
3053 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \ | |
3054 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \ | |
3055 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \ | |
3056 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \ | |
3057 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \ | |
3058 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \ | |
3059 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \ | |
3060 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c | |
857 | 3061 |
859 | 3062 dspfunc(put_qpel, 0, 16); |
3063 dspfunc(put_no_rnd_qpel, 0, 16); | |
3064 | |
3065 dspfunc(avg_qpel, 0, 16); | |
3066 /* dspfunc(avg_no_rnd_qpel, 0, 16); */ | |
857 | 3067 |
859 | 3068 dspfunc(put_qpel, 1, 8); |
3069 dspfunc(put_no_rnd_qpel, 1, 8); | |
3070 | |
3071 dspfunc(avg_qpel, 1, 8); | |
3072 /* dspfunc(avg_no_rnd_qpel, 1, 8); */ | |
1168 | 3073 |
3074 dspfunc(put_h264_qpel, 0, 16); | |
3075 dspfunc(put_h264_qpel, 1, 8); | |
3076 dspfunc(put_h264_qpel, 2, 4); | |
3077 dspfunc(avg_h264_qpel, 0, 16); | |
3078 dspfunc(avg_h264_qpel, 1, 8); | |
3079 dspfunc(avg_h264_qpel, 2, 4); | |
3080 | |
859 | 3081 #undef dspfunc |
1168 | 3082 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c; |
3083 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c; | |
3084 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c; | |
3085 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c; | |
3086 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c; | |
3087 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c; | |
857 | 3088 |
936 | 3089 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; |
3090 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; | |
3091 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; | |
3092 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; | |
3093 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; | |
3094 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; | |
3095 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; | |
3096 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3097 |
936 | 3098 c->hadamard8_abs = hadamard8_abs_c; |
1708 | 3099 |
3100 #define SET_CMP_FUNC(name) \ | |
3101 c->name[0]= name ## 16_c;\ | |
3102 c->name[1]= name ## 8x8_c; | |
936 | 3103 |
1708 | 3104 SET_CMP_FUNC(hadamard8_diff) |
3105 SET_CMP_FUNC(dct_sad) | |
3106 c->sad[0]= pix_abs16_c; | |
3107 c->sad[1]= pix_abs8_c; | |
3108 c->sse[0]= sse16_c; | |
3109 c->sse[1]= sse8_c; | |
3110 SET_CMP_FUNC(quant_psnr) | |
3111 SET_CMP_FUNC(rd) | |
3112 SET_CMP_FUNC(bit) | |
1007 | 3113 |
866 | 3114 c->add_bytes= add_bytes_c; |
3115 c->diff_bytes= diff_bytes_c; | |
1527 | 3116 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; |
1273 | 3117 c->bswap_buf= bswap_buf; |
1644 | 3118 |
3119 c->h263_h_loop_filter= h263_h_loop_filter_c; | |
3120 c->h263_v_loop_filter= h263_v_loop_filter_c; | |
866 | 3121 |
2 | 3122 #ifdef HAVE_MMX |
1092 | 3123 dsputil_init_mmx(c, avctx); |
0 | 3124 #endif |
62 | 3125 #ifdef ARCH_ARMV4L |
1092 | 3126 dsputil_init_armv4l(c, avctx); |
62 | 3127 #endif |
88 | 3128 #ifdef HAVE_MLIB |
1092 | 3129 dsputil_init_mlib(c, avctx); |
88 | 3130 #endif |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
209
diff
changeset
|
3131 #ifdef ARCH_ALPHA |
1092 | 3132 dsputil_init_alpha(c, avctx); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
209
diff
changeset
|
3133 #endif |
623
92e99e506920
first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
612
diff
changeset
|
3134 #ifdef ARCH_POWERPC |
1092 | 3135 dsputil_init_ppc(c, avctx); |
626
23a093d6e450
patch by Heliodoro Tammaro <helio at interactives dot org>
michaelni
parents:
625
diff
changeset
|
3136 #endif |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
676
diff
changeset
|
3137 #ifdef HAVE_MMI |
1092 | 3138 dsputil_init_mmi(c, avctx); |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
676
diff
changeset
|
3139 #endif |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1201
diff
changeset
|
3140 #ifdef ARCH_SH4 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1201
diff
changeset
|
3141 dsputil_init_sh4(c,avctx); |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1201
diff
changeset
|
3142 #endif |
1092 | 3143 |
3144 switch(c->idct_permutation_type){ | |
3145 case FF_NO_IDCT_PERM: | |
3146 for(i=0; i<64; i++) | |
3147 c->idct_permutation[i]= i; | |
3148 break; | |
3149 case FF_LIBMPEG2_IDCT_PERM: | |
3150 for(i=0; i<64; i++) | |
3151 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |
3152 break; | |
3153 case FF_SIMPLE_IDCT_PERM: | |
3154 for(i=0; i<64; i++) | |
3155 c->idct_permutation[i]= simple_mmx_permutation[i]; | |
3156 break; | |
3157 case FF_TRANSPOSE_IDCT_PERM: | |
3158 for(i=0; i<64; i++) | |
3159 c->idct_permutation[i]= ((i&7)<<3) | (i>>3); | |
3160 break; | |
3161 default: | |
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1571
diff
changeset
|
3162 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); |
1092 | 3163 } |
0 | 3164 } |
252
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
3165 |