Mercurial > libavcodec.hg
annotate dsputil.c @ 2497:69adfbbdcdeb libavcodec
- samples from mplayer ftp in the "adv" profile seem to have profile=2,
which isn't the advanced one; and indeed, using adv. profile parser fails.
Using normal parser works, and that's what is done
- attempt at taking care of stride for NORM2 bitplane decoding
- duplication of much code from msmpeg4.c; this code isn't yet used, but
goes down as far as the block layer (mainly Transform Type stuff, the
remains are wild editing without checking). Unusable yet, and lacks the AC
decoding (but a step further in bitstream parsing)
patch by anonymous
author | michael |
---|---|
date | Fri, 04 Feb 2005 02:20:38 +0000 |
parents | bfa9192a22ce |
children | e25782262d7d |
rev | line source |
---|---|
0 | 1 /* |
2 * DSP utils | |
429 | 3 * Copyright (c) 2000, 2001 Fabrice Bellard. |
1739
07a484280a82
copyright year update of the files i touched and remembered, things look annoyingly unmaintained otherwise
michael
parents:
1729
diff
changeset
|
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
0 | 5 * |
429 | 6 * This library is free software; you can redistribute it and/or |
7 * modify it under the terms of the GNU Lesser General Public | |
8 * License as published by the Free Software Foundation; either | |
9 * version 2 of the License, or (at your option) any later version. | |
0 | 10 * |
429 | 11 * This library is distributed in the hope that it will be useful, |
0 | 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
429 | 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 * Lesser General Public License for more details. | |
0 | 15 * |
429 | 16 * You should have received a copy of the GNU Lesser General Public |
17 * License along with this library; if not, write to the Free Software | |
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
256 | 19 * |
385 | 20 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> |
0 | 21 */ |
1106 | 22 |
23 /** | |
24 * @file dsputil.c | |
25 * DSP utils | |
26 */ | |
27 | |
0 | 28 #include "avcodec.h" |
29 #include "dsputil.h" | |
936 | 30 #include "mpegvideo.h" |
1092 | 31 #include "simple_idct.h" |
1557 | 32 #include "faandct.h" |
676 | 33 |
2169
db8baace74d8
Minor Patch for shared libs on Mac OSX by (Bill May <wmay at cisco dot com>)
michael
parents:
2066
diff
changeset
|
34 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; |
db8baace74d8
Minor Patch for shared libs on Mac OSX by (Bill May <wmay at cisco dot com>)
michael
parents:
2066
diff
changeset
|
35 uint32_t squareTbl[512] = {0, }; |
0 | 36 |
1064 | 37 const uint8_t ff_zigzag_direct[64] = { |
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
38 0, 1, 8, 16, 9, 2, 3, 10, |
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
39 17, 24, 32, 25, 18, 11, 4, 5, |
34 | 40 12, 19, 26, 33, 40, 48, 41, 34, |
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
41 27, 20, 13, 6, 7, 14, 21, 28, |
34 | 42 35, 42, 49, 56, 57, 50, 43, 36, |
43 29, 22, 15, 23, 30, 37, 44, 51, | |
44 58, 59, 52, 45, 38, 31, 39, 46, | |
45 53, 60, 61, 54, 47, 55, 62, 63 | |
46 }; | |
47 | |
1567 | 48 /* Specific zigzag scan for 248 idct. NOTE that unlike the |
49 specification, we interleave the fields */ | |
50 const uint8_t ff_zigzag248_direct[64] = { | |
51 0, 8, 1, 9, 16, 24, 2, 10, | |
52 17, 25, 32, 40, 48, 56, 33, 41, | |
53 18, 26, 3, 11, 4, 12, 19, 27, | |
54 34, 42, 49, 57, 50, 58, 35, 43, | |
55 20, 28, 5, 13, 6, 14, 21, 29, | |
56 36, 44, 51, 59, 52, 60, 37, 45, | |
57 22, 30, 7, 15, 23, 31, 38, 46, | |
58 53, 61, 54, 62, 39, 47, 55, 63, | |
59 }; | |
60 | |
220 | 61 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ |
2169
db8baace74d8
Minor Patch for shared libs on Mac OSX by (Bill May <wmay at cisco dot com>)
michael
parents:
2066
diff
changeset
|
62 uint16_t __align8 inv_zigzag_direct16[64] = {0, }; |
220 | 63 |
1064 | 64 const uint8_t ff_alternate_horizontal_scan[64] = { |
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
65 0, 1, 2, 3, 8, 9, 16, 17, |
34 | 66 10, 11, 4, 5, 6, 7, 15, 14, |
67 13, 12, 19, 18, 24, 25, 32, 33, | |
68 26, 27, 20, 21, 22, 23, 28, 29, | |
69 30, 31, 34, 35, 40, 41, 48, 49, | |
70 42, 43, 36, 37, 38, 39, 44, 45, | |
71 46, 47, 50, 51, 56, 57, 58, 59, | |
72 52, 53, 54, 55, 60, 61, 62, 63, | |
73 }; | |
74 | |
1064 | 75 const uint8_t ff_alternate_vertical_scan[64] = { |
706
e65798d228ea
idct permutation cleanup, idct can be selected per context now
michaelni
parents:
689
diff
changeset
|
76 0, 8, 16, 24, 1, 9, 2, 10, |
34 | 77 17, 25, 32, 40, 48, 56, 57, 49, |
78 41, 33, 26, 18, 3, 11, 4, 12, | |
79 19, 27, 34, 42, 50, 58, 35, 43, | |
80 51, 59, 20, 28, 5, 13, 6, 14, | |
81 21, 29, 36, 44, 52, 60, 37, 45, | |
82 53, 61, 22, 30, 7, 15, 23, 31, | |
83 38, 46, 54, 62, 39, 47, 55, 63, | |
84 }; | |
85 | |
220 | 86 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ |
1064 | 87 const uint32_t inverse[256]={ |
220 | 88 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, |
89 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, | |
90 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, | |
91 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, | |
92 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, | |
93 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, | |
94 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, | |
95 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, | |
96 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, | |
97 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, | |
98 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, | |
99 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, | |
100 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, | |
101 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, | |
102 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, | |
103 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, | |
104 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, | |
105 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, | |
106 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, | |
107 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, | |
108 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, | |
109 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, | |
110 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, | |
111 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, | |
112 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, | |
113 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, | |
114 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, | |
115 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, | |
116 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, | |
117 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, | |
118 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, | |
119 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, | |
120 }; | |
121 | |
1092 | 122 /* Input permutation for the simple_idct_mmx */ |
123 static const uint8_t simple_mmx_permutation[64]={ | |
124 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, | |
125 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, | |
126 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, | |
127 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, | |
128 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, | |
129 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, | |
130 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, | |
131 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, | |
132 }; | |
133 | |
1064 | 134 static int pix_sum_c(uint8_t * pix, int line_size) |
612 | 135 { |
136 int s, i, j; | |
137 | |
138 s = 0; | |
139 for (i = 0; i < 16; i++) { | |
140 for (j = 0; j < 16; j += 8) { | |
141 s += pix[0]; | |
142 s += pix[1]; | |
143 s += pix[2]; | |
144 s += pix[3]; | |
145 s += pix[4]; | |
146 s += pix[5]; | |
147 s += pix[6]; | |
148 s += pix[7]; | |
149 pix += 8; | |
150 } | |
151 pix += line_size - 16; | |
152 } | |
153 return s; | |
154 } | |
155 | |
1064 | 156 static int pix_norm1_c(uint8_t * pix, int line_size) |
612 | 157 { |
158 int s, i, j; | |
1064 | 159 uint32_t *sq = squareTbl + 256; |
612 | 160 |
161 s = 0; | |
162 for (i = 0; i < 16; i++) { | |
163 for (j = 0; j < 16; j += 8) { | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
164 #if 0 |
612 | 165 s += sq[pix[0]]; |
166 s += sq[pix[1]]; | |
167 s += sq[pix[2]]; | |
168 s += sq[pix[3]]; | |
169 s += sq[pix[4]]; | |
170 s += sq[pix[5]]; | |
171 s += sq[pix[6]]; | |
172 s += sq[pix[7]]; | |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
173 #else |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
174 #if LONG_MAX > 2147483647 |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
175 register uint64_t x=*(uint64_t*)pix; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
176 s += sq[x&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
177 s += sq[(x>>8)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
178 s += sq[(x>>16)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
179 s += sq[(x>>24)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
180 s += sq[(x>>32)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
181 s += sq[(x>>40)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
182 s += sq[(x>>48)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
183 s += sq[(x>>56)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
184 #else |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
185 register uint32_t x=*(uint32_t*)pix; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
186 s += sq[x&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
187 s += sq[(x>>8)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
188 s += sq[(x>>16)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
189 s += sq[(x>>24)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
190 x=*(uint32_t*)(pix+4); |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
191 s += sq[x&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
192 s += sq[(x>>8)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
193 s += sq[(x>>16)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
194 s += sq[(x>>24)&0xff]; |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
195 #endif |
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
196 #endif |
612 | 197 pix += 8; |
198 } | |
199 pix += line_size - 16; | |
200 } | |
201 return s; | |
202 } | |
203 | |
1273 | 204 static void bswap_buf(uint32_t *dst, uint32_t *src, int w){ |
205 int i; | |
206 | |
207 for(i=0; i+8<=w; i+=8){ | |
208 dst[i+0]= bswap_32(src[i+0]); | |
209 dst[i+1]= bswap_32(src[i+1]); | |
210 dst[i+2]= bswap_32(src[i+2]); | |
211 dst[i+3]= bswap_32(src[i+3]); | |
212 dst[i+4]= bswap_32(src[i+4]); | |
213 dst[i+5]= bswap_32(src[i+5]); | |
214 dst[i+6]= bswap_32(src[i+6]); | |
215 dst[i+7]= bswap_32(src[i+7]); | |
216 } | |
217 for(;i<w; i++){ | |
218 dst[i+0]= bswap_32(src[i+0]); | |
219 } | |
220 } | |
612 | 221 |
2184 | 222 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) |
223 { | |
224 int s, i; | |
225 uint32_t *sq = squareTbl + 256; | |
226 | |
227 s = 0; | |
228 for (i = 0; i < h; i++) { | |
229 s += sq[pix1[0] - pix2[0]]; | |
230 s += sq[pix1[1] - pix2[1]]; | |
231 s += sq[pix1[2] - pix2[2]]; | |
232 s += sq[pix1[3] - pix2[3]]; | |
233 pix1 += line_size; | |
234 pix2 += line_size; | |
235 } | |
236 return s; | |
237 } | |
238 | |
1708 | 239 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) |
936 | 240 { |
241 int s, i; | |
1064 | 242 uint32_t *sq = squareTbl + 256; |
936 | 243 |
244 s = 0; | |
1708 | 245 for (i = 0; i < h; i++) { |
936 | 246 s += sq[pix1[0] - pix2[0]]; |
247 s += sq[pix1[1] - pix2[1]]; | |
248 s += sq[pix1[2] - pix2[2]]; | |
249 s += sq[pix1[3] - pix2[3]]; | |
250 s += sq[pix1[4] - pix2[4]]; | |
251 s += sq[pix1[5] - pix2[5]]; | |
252 s += sq[pix1[6] - pix2[6]]; | |
253 s += sq[pix1[7] - pix2[7]]; | |
254 pix1 += line_size; | |
255 pix2 += line_size; | |
256 } | |
257 return s; | |
258 } | |
259 | |
1708 | 260 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
884 | 261 { |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
262 int s, i; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
263 uint32_t *sq = squareTbl + 256; |
884 | 264 |
265 s = 0; | |
1708 | 266 for (i = 0; i < h; i++) { |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
267 s += sq[pix1[ 0] - pix2[ 0]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
268 s += sq[pix1[ 1] - pix2[ 1]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
269 s += sq[pix1[ 2] - pix2[ 2]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
270 s += sq[pix1[ 3] - pix2[ 3]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
271 s += sq[pix1[ 4] - pix2[ 4]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
272 s += sq[pix1[ 5] - pix2[ 5]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
273 s += sq[pix1[ 6] - pix2[ 6]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
274 s += sq[pix1[ 7] - pix2[ 7]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
275 s += sq[pix1[ 8] - pix2[ 8]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
276 s += sq[pix1[ 9] - pix2[ 9]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
277 s += sq[pix1[10] - pix2[10]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
278 s += sq[pix1[11] - pix2[11]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
279 s += sq[pix1[12] - pix2[12]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
280 s += sq[pix1[13] - pix2[13]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
281 s += sq[pix1[14] - pix2[14]]; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
282 s += sq[pix1[15] - pix2[15]]; |
997
4dfe15ae0078
sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
michaelni
parents:
996
diff
changeset
|
283 |
1012
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
284 pix1 += line_size; |
7a5038ec769b
sse16_c is totally fucked up (unaligned loads, LONG_MAX is undefined,
mellum
parents:
1011
diff
changeset
|
285 pix2 += line_size; |
884 | 286 } |
287 return s; | |
288 } | |
289 | |
2184 | 290 |
291 static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){ | |
292 int s, i, j; | |
293 const int dec_count= w==8 ? 3 : 4; | |
294 int tmp[16*16]; | |
295 #if 0 | |
296 int level, ori; | |
297 static const int scale[2][2][4][4]={ | |
298 { | |
299 { | |
300 //8x8 dec=3 | |
301 {268, 239, 239, 213}, | |
302 { 0, 224, 224, 152}, | |
303 { 0, 135, 135, 110}, | |
304 },{ | |
305 //16x16 dec=4 | |
306 {344, 310, 310, 280}, | |
307 { 0, 320, 320, 228}, | |
308 { 0, 175, 175, 136}, | |
309 { 0, 129, 129, 102}, | |
310 } | |
311 },{ | |
312 {//FIXME 5/3 | |
313 //8x8 dec=3 | |
314 {275, 245, 245, 218}, | |
315 { 0, 230, 230, 156}, | |
316 { 0, 138, 138, 113}, | |
317 },{ | |
318 //16x16 dec=4 | |
319 {352, 317, 317, 286}, | |
320 { 0, 328, 328, 233}, | |
321 { 0, 180, 180, 140}, | |
322 { 0, 132, 132, 105}, | |
323 } | |
324 } | |
325 }; | |
326 #endif | |
327 | |
328 for (i = 0; i < h; i++) { | |
329 for (j = 0; j < w; j+=4) { | |
330 tmp[16*i+j+0] = (pix1[j+0] - pix2[j+0])<<4; | |
331 tmp[16*i+j+1] = (pix1[j+1] - pix2[j+1])<<4; | |
332 tmp[16*i+j+2] = (pix1[j+2] - pix2[j+2])<<4; | |
333 tmp[16*i+j+3] = (pix1[j+3] - pix2[j+3])<<4; | |
334 } | |
335 pix1 += line_size; | |
336 pix2 += line_size; | |
337 } | |
338 ff_spatial_dwt(tmp, w, h, 16, type, dec_count); | |
339 | |
340 s=0; | |
341 #if 0 | |
342 for(level=0; level<dec_count; level++){ | |
343 for(ori= level ? 1 : 0; ori<4; ori++){ | |
344 int sx= (ori&1) ? 1<<level: 0; | |
345 int stride= 16<<(dec_count-level); | |
346 int sy= (ori&2) ? stride>>1 : 0; | |
347 int size= 1<<level; | |
348 | |
349 for(i=0; i<size; i++){ | |
350 for(j=0; j<size; j++){ | |
351 int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori]; | |
352 s += ABS(v); | |
353 } | |
354 } | |
355 } | |
356 } | |
357 #endif | |
358 for (i = 0; i < h; i++) { | |
359 for (j = 0; j < w; j+=4) { | |
360 s+= ABS(tmp[16*i+j+0]); | |
361 s+= ABS(tmp[16*i+j+1]); | |
362 s+= ABS(tmp[16*i+j+2]); | |
363 s+= ABS(tmp[16*i+j+3]); | |
364 } | |
365 } | |
366 assert(s>=0); | |
367 | |
368 return s>>2; | |
369 } | |
370 | |
371 static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | |
372 return w_c(v, pix1, pix2, line_size, 8, h, 1); | |
373 } | |
374 | |
375 static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | |
376 return w_c(v, pix1, pix2, line_size, 8, h, 0); | |
377 } | |
378 | |
379 static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | |
380 return w_c(v, pix1, pix2, line_size, 16, h, 1); | |
381 } | |
382 | |
383 static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ | |
384 return w_c(v, pix1, pix2, line_size, 16, h, 0); | |
385 } | |
386 | |
1064 | 387 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) |
0 | 388 { |
389 int i; | |
390 | |
391 /* read the pixels */ | |
392 for(i=0;i<8;i++) { | |
516 | 393 block[0] = pixels[0]; |
394 block[1] = pixels[1]; | |
395 block[2] = pixels[2]; | |
396 block[3] = pixels[3]; | |
397 block[4] = pixels[4]; | |
398 block[5] = pixels[5]; | |
399 block[6] = pixels[6]; | |
400 block[7] = pixels[7]; | |
401 pixels += line_size; | |
402 block += 8; | |
0 | 403 } |
404 } | |
405 | |
1064 | 406 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, |
407 const uint8_t *s2, int stride){ | |
324 | 408 int i; |
409 | |
410 /* read the pixels */ | |
411 for(i=0;i<8;i++) { | |
516 | 412 block[0] = s1[0] - s2[0]; |
413 block[1] = s1[1] - s2[1]; | |
414 block[2] = s1[2] - s2[2]; | |
415 block[3] = s1[3] - s2[3]; | |
416 block[4] = s1[4] - s2[4]; | |
417 block[5] = s1[5] - s2[5]; | |
418 block[6] = s1[6] - s2[6]; | |
419 block[7] = s1[7] - s2[7]; | |
324 | 420 s1 += stride; |
421 s2 += stride; | |
516 | 422 block += 8; |
324 | 423 } |
424 } | |
425 | |
426 | |
1064 | 427 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
428 int line_size) |
0 | 429 { |
430 int i; | |
1064 | 431 uint8_t *cm = cropTbl + MAX_NEG_CROP; |
0 | 432 |
433 /* read the pixels */ | |
434 for(i=0;i<8;i++) { | |
516 | 435 pixels[0] = cm[block[0]]; |
436 pixels[1] = cm[block[1]]; | |
437 pixels[2] = cm[block[2]]; | |
438 pixels[3] = cm[block[3]]; | |
439 pixels[4] = cm[block[4]]; | |
440 pixels[5] = cm[block[5]]; | |
441 pixels[6] = cm[block[6]]; | |
442 pixels[7] = cm[block[7]]; | |
443 | |
444 pixels += line_size; | |
445 block += 8; | |
0 | 446 } |
447 } | |
448 | |
2256 | 449 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, |
450 int line_size) | |
451 { | |
452 int i; | |
453 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
454 | |
455 /* read the pixels */ | |
456 for(i=0;i<4;i++) { | |
457 pixels[0] = cm[block[0]]; | |
458 pixels[1] = cm[block[1]]; | |
459 pixels[2] = cm[block[2]]; | |
460 pixels[3] = cm[block[3]]; | |
461 | |
462 pixels += line_size; | |
463 block += 8; | |
464 } | |
465 } | |
466 | |
2257 | 467 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, |
468 int line_size) | |
469 { | |
470 int i; | |
471 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
472 | |
473 /* read the pixels */ | |
474 for(i=0;i<2;i++) { | |
475 pixels[0] = cm[block[0]]; | |
476 pixels[1] = cm[block[1]]; | |
477 | |
478 pixels += line_size; | |
479 block += 8; | |
480 } | |
481 } | |
482 | |
1984
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
483 static void put_signed_pixels_clamped_c(const DCTELEM *block, |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
484 uint8_t *restrict pixels, |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
485 int line_size) |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
486 { |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
487 int i, j; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
488 |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
489 for (i = 0; i < 8; i++) { |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
490 for (j = 0; j < 8; j++) { |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
491 if (*block < -128) |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
492 *pixels = 0; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
493 else if (*block > 127) |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
494 *pixels = 255; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
495 else |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
496 *pixels = (uint8_t)(*block + 128); |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
497 block++; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
498 pixels++; |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
499 } |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
500 pixels += (line_size - 8); |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
501 } |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
502 } |
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
503 |
1064 | 504 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, |
516 | 505 int line_size) |
0 | 506 { |
507 int i; | |
1064 | 508 uint8_t *cm = cropTbl + MAX_NEG_CROP; |
0 | 509 |
510 /* read the pixels */ | |
511 for(i=0;i<8;i++) { | |
516 | 512 pixels[0] = cm[pixels[0] + block[0]]; |
513 pixels[1] = cm[pixels[1] + block[1]]; | |
514 pixels[2] = cm[pixels[2] + block[2]]; | |
515 pixels[3] = cm[pixels[3] + block[3]]; | |
516 pixels[4] = cm[pixels[4] + block[4]]; | |
517 pixels[5] = cm[pixels[5] + block[5]]; | |
518 pixels[6] = cm[pixels[6] + block[6]]; | |
519 pixels[7] = cm[pixels[7] + block[7]]; | |
520 pixels += line_size; | |
521 block += 8; | |
0 | 522 } |
523 } | |
2256 | 524 |
525 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, | |
526 int line_size) | |
527 { | |
528 int i; | |
529 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
530 | |
531 /* read the pixels */ | |
532 for(i=0;i<4;i++) { | |
533 pixels[0] = cm[pixels[0] + block[0]]; | |
534 pixels[1] = cm[pixels[1] + block[1]]; | |
535 pixels[2] = cm[pixels[2] + block[2]]; | |
536 pixels[3] = cm[pixels[3] + block[3]]; | |
537 pixels += line_size; | |
538 block += 8; | |
539 } | |
540 } | |
2257 | 541 |
542 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, | |
543 int line_size) | |
544 { | |
545 int i; | |
546 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
547 | |
548 /* read the pixels */ | |
549 for(i=0;i<2;i++) { | |
550 pixels[0] = cm[pixels[0] + block[0]]; | |
551 pixels[1] = cm[pixels[1] + block[1]]; | |
552 pixels += line_size; | |
553 block += 8; | |
554 } | |
555 } | |
385 | 556 #if 0 |
557 | |
558 #define PIXOP2(OPNAME, OP) \ | |
651 | 559 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 560 {\ |
561 int i;\ | |
562 for(i=0; i<h; i++){\ | |
563 OP(*((uint64_t*)block), LD64(pixels));\ | |
564 pixels+=line_size;\ | |
565 block +=line_size;\ | |
566 }\ | |
567 }\ | |
568 \ | |
859 | 569 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 570 {\ |
571 int i;\ | |
572 for(i=0; i<h; i++){\ | |
573 const uint64_t a= LD64(pixels );\ | |
574 const uint64_t b= LD64(pixels+1);\ | |
575 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
576 pixels+=line_size;\ | |
577 block +=line_size;\ | |
578 }\ | |
579 }\ | |
580 \ | |
859 | 581 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 582 {\ |
583 int i;\ | |
584 for(i=0; i<h; i++){\ | |
585 const uint64_t a= LD64(pixels );\ | |
586 const uint64_t b= LD64(pixels+1);\ | |
587 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
588 pixels+=line_size;\ | |
589 block +=line_size;\ | |
590 }\ | |
591 }\ | |
592 \ | |
859 | 593 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 594 {\ |
595 int i;\ | |
596 for(i=0; i<h; i++){\ | |
597 const uint64_t a= LD64(pixels );\ | |
598 const uint64_t b= LD64(pixels+line_size);\ | |
599 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
600 pixels+=line_size;\ | |
601 block +=line_size;\ | |
602 }\ | |
603 }\ | |
604 \ | |
859 | 605 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 606 {\ |
607 int i;\ | |
608 for(i=0; i<h; i++){\ | |
609 const uint64_t a= LD64(pixels );\ | |
610 const uint64_t b= LD64(pixels+line_size);\ | |
611 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ | |
612 pixels+=line_size;\ | |
613 block +=line_size;\ | |
614 }\ | |
615 }\ | |
616 \ | |
859 | 617 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 618 {\ |
619 int i;\ | |
620 const uint64_t a= LD64(pixels );\ | |
621 const uint64_t b= LD64(pixels+1);\ | |
622 uint64_t l0= (a&0x0303030303030303ULL)\ | |
623 + (b&0x0303030303030303ULL)\ | |
624 + 0x0202020202020202ULL;\ | |
625 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
626 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
627 uint64_t l1,h1;\ | |
628 \ | |
629 pixels+=line_size;\ | |
630 for(i=0; i<h; i+=2){\ | |
631 uint64_t a= LD64(pixels );\ | |
632 uint64_t b= LD64(pixels+1);\ | |
633 l1= (a&0x0303030303030303ULL)\ | |
634 + (b&0x0303030303030303ULL);\ | |
635 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
636 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
637 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
638 pixels+=line_size;\ | |
639 block +=line_size;\ | |
640 a= LD64(pixels );\ | |
641 b= LD64(pixels+1);\ | |
642 l0= (a&0x0303030303030303ULL)\ | |
643 + (b&0x0303030303030303ULL)\ | |
644 + 0x0202020202020202ULL;\ | |
645 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
646 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
647 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
648 pixels+=line_size;\ | |
649 block +=line_size;\ | |
650 }\ | |
651 }\ | |
652 \ | |
859 | 653 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 654 {\ |
655 int i;\ | |
656 const uint64_t a= LD64(pixels );\ | |
657 const uint64_t b= LD64(pixels+1);\ | |
658 uint64_t l0= (a&0x0303030303030303ULL)\ | |
659 + (b&0x0303030303030303ULL)\ | |
660 + 0x0101010101010101ULL;\ | |
661 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
662 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
663 uint64_t l1,h1;\ | |
664 \ | |
665 pixels+=line_size;\ | |
666 for(i=0; i<h; i+=2){\ | |
667 uint64_t a= LD64(pixels );\ | |
668 uint64_t b= LD64(pixels+1);\ | |
669 l1= (a&0x0303030303030303ULL)\ | |
670 + (b&0x0303030303030303ULL);\ | |
671 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
672 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
673 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
674 pixels+=line_size;\ | |
675 block +=line_size;\ | |
676 a= LD64(pixels );\ | |
677 b= LD64(pixels+1);\ | |
678 l0= (a&0x0303030303030303ULL)\ | |
679 + (b&0x0303030303030303ULL)\ | |
680 + 0x0101010101010101ULL;\ | |
681 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ | |
682 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ | |
683 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ | |
684 pixels+=line_size;\ | |
685 block +=line_size;\ | |
686 }\ | |
687 }\ | |
688 \ | |
859 | 689 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\ |
690 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\ | |
691 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\ | |
692 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\ | |
693 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\ | |
694 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\ | |
695 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8) | |
385 | 696 |
697 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) | |
698 #else // 64 bit variant | |
699 | |
700 #define PIXOP2(OPNAME, OP) \ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
701 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
702 int i;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
703 for(i=0; i<h; i++){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
704 OP(*((uint16_t*)(block )), LD16(pixels ));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
705 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
706 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
707 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
708 }\ |
1168 | 709 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
710 int i;\ | |
711 for(i=0; i<h; i++){\ | |
712 OP(*((uint32_t*)(block )), LD32(pixels ));\ | |
713 pixels+=line_size;\ | |
714 block +=line_size;\ | |
715 }\ | |
716 }\ | |
859 | 717 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
385 | 718 int i;\ |
719 for(i=0; i<h; i++){\ | |
720 OP(*((uint32_t*)(block )), LD32(pixels ));\ | |
721 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\ | |
722 pixels+=line_size;\ | |
723 block +=line_size;\ | |
724 }\ | |
725 }\ | |
859 | 726 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
727 OPNAME ## _pixels8_c(block, pixels, line_size, h);\ | |
651 | 728 }\ |
385 | 729 \ |
651 | 730 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
731 int src_stride1, int src_stride2, int h){\ | |
385 | 732 int i;\ |
733 for(i=0; i<h; i++){\ | |
651 | 734 uint32_t a,b;\ |
735 a= LD32(&src1[i*src_stride1 ]);\ | |
736 b= LD32(&src2[i*src_stride2 ]);\ | |
1264 | 737 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ |
651 | 738 a= LD32(&src1[i*src_stride1+4]);\ |
739 b= LD32(&src2[i*src_stride2+4]);\ | |
1264 | 740 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\ |
385 | 741 }\ |
742 }\ | |
743 \ | |
651 | 744 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
745 int src_stride1, int src_stride2, int h){\ | |
385 | 746 int i;\ |
747 for(i=0; i<h; i++){\ | |
651 | 748 uint32_t a,b;\ |
749 a= LD32(&src1[i*src_stride1 ]);\ | |
750 b= LD32(&src2[i*src_stride2 ]);\ | |
1264 | 751 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
651 | 752 a= LD32(&src1[i*src_stride1+4]);\ |
753 b= LD32(&src2[i*src_stride2+4]);\ | |
1264 | 754 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\ |
385 | 755 }\ |
756 }\ | |
757 \ | |
1168 | 758 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
759 int src_stride1, int src_stride2, int h){\ | |
760 int i;\ | |
761 for(i=0; i<h; i++){\ | |
762 uint32_t a,b;\ | |
763 a= LD32(&src1[i*src_stride1 ]);\ | |
764 b= LD32(&src2[i*src_stride2 ]);\ | |
1264 | 765 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
1168 | 766 }\ |
767 }\ | |
768 \ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
769 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
770 int src_stride1, int src_stride2, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
771 int i;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
772 for(i=0; i<h; i++){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
773 uint32_t a,b;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
774 a= LD16(&src1[i*src_stride1 ]);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
775 b= LD16(&src2[i*src_stride2 ]);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
776 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
777 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
778 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
779 \ |
651 | 780 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
781 int src_stride1, int src_stride2, int h){\ | |
782 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | |
783 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | |
784 }\ | |
785 \ | |
786 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ | |
787 int src_stride1, int src_stride2, int h){\ | |
788 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ | |
789 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ | |
790 }\ | |
791 \ | |
859 | 792 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 793 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
794 }\ | |
795 \ | |
859 | 796 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 797 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
798 }\ | |
799 \ | |
859 | 800 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 801 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
802 }\ | |
803 \ | |
859 | 804 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
651 | 805 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
385 | 806 }\ |
807 \ | |
651 | 808 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
809 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
810 int i;\ | |
811 for(i=0; i<h; i++){\ | |
812 uint32_t a, b, c, d, l0, l1, h0, h1;\ | |
813 a= LD32(&src1[i*src_stride1]);\ | |
814 b= LD32(&src2[i*src_stride2]);\ | |
815 c= LD32(&src3[i*src_stride3]);\ | |
816 d= LD32(&src4[i*src_stride4]);\ | |
817 l0= (a&0x03030303UL)\ | |
818 + (b&0x03030303UL)\ | |
819 + 0x02020202UL;\ | |
820 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
821 + ((b&0xFCFCFCFCUL)>>2);\ | |
822 l1= (c&0x03030303UL)\ | |
823 + (d&0x03030303UL);\ | |
824 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
825 + ((d&0xFCFCFCFCUL)>>2);\ | |
826 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
827 a= LD32(&src1[i*src_stride1+4]);\ | |
828 b= LD32(&src2[i*src_stride2+4]);\ | |
829 c= LD32(&src3[i*src_stride3+4]);\ | |
830 d= LD32(&src4[i*src_stride4+4]);\ | |
831 l0= (a&0x03030303UL)\ | |
832 + (b&0x03030303UL)\ | |
833 + 0x02020202UL;\ | |
834 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
835 + ((b&0xFCFCFCFCUL)>>2);\ | |
836 l1= (c&0x03030303UL)\ | |
837 + (d&0x03030303UL);\ | |
838 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
839 + ((d&0xFCFCFCFCUL)>>2);\ | |
840 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
841 }\ | |
842 }\ | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
843 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
844 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
845 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
846 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
847 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
848 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
849 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
850 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
851 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
852 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
853 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
854 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
855 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
856 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
857 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
858 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
859 \ |
651 | 860 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
861 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
385 | 862 int i;\ |
863 for(i=0; i<h; i++){\ | |
651 | 864 uint32_t a, b, c, d, l0, l1, h0, h1;\ |
865 a= LD32(&src1[i*src_stride1]);\ | |
866 b= LD32(&src2[i*src_stride2]);\ | |
867 c= LD32(&src3[i*src_stride3]);\ | |
868 d= LD32(&src4[i*src_stride4]);\ | |
869 l0= (a&0x03030303UL)\ | |
870 + (b&0x03030303UL)\ | |
871 + 0x01010101UL;\ | |
872 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
873 + ((b&0xFCFCFCFCUL)>>2);\ | |
874 l1= (c&0x03030303UL)\ | |
875 + (d&0x03030303UL);\ | |
876 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
877 + ((d&0xFCFCFCFCUL)>>2);\ | |
878 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
879 a= LD32(&src1[i*src_stride1+4]);\ | |
880 b= LD32(&src2[i*src_stride2+4]);\ | |
881 c= LD32(&src3[i*src_stride3+4]);\ | |
882 d= LD32(&src4[i*src_stride4+4]);\ | |
883 l0= (a&0x03030303UL)\ | |
884 + (b&0x03030303UL)\ | |
885 + 0x01010101UL;\ | |
886 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
887 + ((b&0xFCFCFCFCUL)>>2);\ | |
888 l1= (c&0x03030303UL)\ | |
889 + (d&0x03030303UL);\ | |
890 h1= ((c&0xFCFCFCFCUL)>>2)\ | |
891 + ((d&0xFCFCFCFCUL)>>2);\ | |
892 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
385 | 893 }\ |
894 }\ | |
651 | 895 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ |
896 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
897 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
898 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
899 }\ | |
900 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ | |
901 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ | |
902 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
903 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ | |
904 }\ | |
385 | 905 \ |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
906 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
907 {\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
908 int i, a0, b0, a1, b1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
909 a0= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
910 b0= pixels[1] + 2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
911 a0 += b0;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
912 b0 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
913 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
914 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
915 for(i=0; i<h; i+=2){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
916 a1= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
917 b1= pixels[1];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
918 a1 += b1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
919 b1 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
920 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
921 block[0]= (a1+a0)>>2; /* FIXME non put */\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
922 block[1]= (b1+b0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
923 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
924 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
925 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
926 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
927 a0= pixels[0];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
928 b0= pixels[1] + 2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
929 a0 += b0;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
930 b0 += pixels[2];\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
931 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
932 block[0]= (a1+a0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
933 block[1]= (b1+b0)>>2;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
934 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
935 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
936 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
937 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
938 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
939 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
940 {\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
941 int i;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
942 const uint32_t a= LD32(pixels );\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
943 const uint32_t b= LD32(pixels+1);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
944 uint32_t l0= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
945 + (b&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
946 + 0x02020202UL;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
947 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
948 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
949 uint32_t l1,h1;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
950 \ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
951 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
952 for(i=0; i<h; i+=2){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
953 uint32_t a= LD32(pixels );\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
954 uint32_t b= LD32(pixels+1);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
955 l1= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
956 + (b&0x03030303UL);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
957 h1= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
958 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
959 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
960 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
961 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
962 a= LD32(pixels );\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
963 b= LD32(pixels+1);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
964 l0= (a&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
965 + (b&0x03030303UL)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
966 + 0x02020202UL;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
967 h0= ((a&0xFCFCFCFCUL)>>2)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
968 + ((b&0xFCFCFCFCUL)>>2);\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
969 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
970 pixels+=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
971 block +=line_size;\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
972 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
973 }\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
974 \ |
859 | 975 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 976 {\ |
977 int j;\ | |
978 for(j=0; j<2; j++){\ | |
979 int i;\ | |
980 const uint32_t a= LD32(pixels );\ | |
981 const uint32_t b= LD32(pixels+1);\ | |
982 uint32_t l0= (a&0x03030303UL)\ | |
983 + (b&0x03030303UL)\ | |
984 + 0x02020202UL;\ | |
985 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
986 + ((b&0xFCFCFCFCUL)>>2);\ | |
987 uint32_t l1,h1;\ | |
988 \ | |
989 pixels+=line_size;\ | |
990 for(i=0; i<h; i+=2){\ | |
991 uint32_t a= LD32(pixels );\ | |
992 uint32_t b= LD32(pixels+1);\ | |
993 l1= (a&0x03030303UL)\ | |
994 + (b&0x03030303UL);\ | |
995 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
996 + ((b&0xFCFCFCFCUL)>>2);\ | |
997 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
998 pixels+=line_size;\ | |
999 block +=line_size;\ | |
1000 a= LD32(pixels );\ | |
1001 b= LD32(pixels+1);\ | |
1002 l0= (a&0x03030303UL)\ | |
1003 + (b&0x03030303UL)\ | |
1004 + 0x02020202UL;\ | |
1005 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1006 + ((b&0xFCFCFCFCUL)>>2);\ | |
1007 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1008 pixels+=line_size;\ | |
1009 block +=line_size;\ | |
1010 }\ | |
1011 pixels+=4-line_size*(h+1);\ | |
1012 block +=4-line_size*h;\ | |
1013 }\ | |
1014 }\ | |
1015 \ | |
859 | 1016 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
385 | 1017 {\ |
1018 int j;\ | |
1019 for(j=0; j<2; j++){\ | |
1020 int i;\ | |
1021 const uint32_t a= LD32(pixels );\ | |
1022 const uint32_t b= LD32(pixels+1);\ | |
1023 uint32_t l0= (a&0x03030303UL)\ | |
1024 + (b&0x03030303UL)\ | |
1025 + 0x01010101UL;\ | |
1026 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1027 + ((b&0xFCFCFCFCUL)>>2);\ | |
1028 uint32_t l1,h1;\ | |
1029 \ | |
1030 pixels+=line_size;\ | |
1031 for(i=0; i<h; i+=2){\ | |
1032 uint32_t a= LD32(pixels );\ | |
1033 uint32_t b= LD32(pixels+1);\ | |
1034 l1= (a&0x03030303UL)\ | |
1035 + (b&0x03030303UL);\ | |
1036 h1= ((a&0xFCFCFCFCUL)>>2)\ | |
1037 + ((b&0xFCFCFCFCUL)>>2);\ | |
1038 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1039 pixels+=line_size;\ | |
1040 block +=line_size;\ | |
1041 a= LD32(pixels );\ | |
1042 b= LD32(pixels+1);\ | |
1043 l0= (a&0x03030303UL)\ | |
1044 + (b&0x03030303UL)\ | |
1045 + 0x01010101UL;\ | |
1046 h0= ((a&0xFCFCFCFCUL)>>2)\ | |
1047 + ((b&0xFCFCFCFCUL)>>2);\ | |
1048 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ | |
1049 pixels+=line_size;\ | |
1050 block +=line_size;\ | |
1051 }\ | |
1052 pixels+=4-line_size*(h+1);\ | |
1053 block +=4-line_size*h;\ | |
1054 }\ | |
1055 }\ | |
1056 \ | |
859 | 1057 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\ |
1058 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\ | |
1059 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\ | |
1060 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\ | |
1061 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\ | |
1062 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\ | |
1063 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ | |
1064 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ | |
651 | 1065 |
1264 | 1066 #define op_avg(a, b) a = rnd_avg32(a, b) |
385 | 1067 #endif |
1068 #define op_put(a, b) a = b | |
1069 | |
1070 PIXOP2(avg, op_avg) | |
1071 PIXOP2(put, op_put) | |
1072 #undef op_avg | |
1073 #undef op_put | |
1074 | |
0 | 1075 #define avg2(a,b) ((a+b+1)>>1) |
1076 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) | |
1077 | |
1864 | 1078 static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ |
1079 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h); | |
1080 } | |
1081 | |
1082 static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ | |
1083 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h); | |
1084 } | |
753 | 1085 |
1064 | 1086 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) |
255 | 1087 { |
1088 const int A=(16-x16)*(16-y16); | |
1089 const int B=( x16)*(16-y16); | |
1090 const int C=(16-x16)*( y16); | |
1091 const int D=( x16)*( y16); | |
1092 int i; | |
1093 | |
1094 for(i=0; i<h; i++) | |
1095 { | |
651 | 1096 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; |
1097 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; | |
1098 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; | |
1099 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8; | |
1100 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8; | |
1101 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8; | |
1102 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; | |
1103 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; | |
1104 dst+= stride; | |
1105 src+= stride; | |
255 | 1106 } |
1107 } | |
1108 | |
1064 | 1109 static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, |
753 | 1110 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) |
1111 { | |
1112 int y, vx, vy; | |
1113 const int s= 1<<shift; | |
1114 | |
1115 width--; | |
1116 height--; | |
1117 | |
1118 for(y=0; y<h; y++){ | |
1119 int x; | |
1120 | |
1121 vx= ox; | |
1122 vy= oy; | |
1123 for(x=0; x<8; x++){ //XXX FIXME optimize | |
1124 int src_x, src_y, frac_x, frac_y, index; | |
1125 | |
1126 src_x= vx>>16; | |
1127 src_y= vy>>16; | |
1128 frac_x= src_x&(s-1); | |
1129 frac_y= src_y&(s-1); | |
1130 src_x>>=shift; | |
1131 src_y>>=shift; | |
1132 | |
1133 if((unsigned)src_x < width){ | |
1134 if((unsigned)src_y < height){ | |
1135 index= src_x + src_y*stride; | |
1136 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) | |
1137 + src[index +1]* frac_x )*(s-frac_y) | |
1138 + ( src[index+stride ]*(s-frac_x) | |
1139 + src[index+stride+1]* frac_x )* frac_y | |
1140 + r)>>(shift*2); | |
1141 }else{ | |
1142 index= src_x + clip(src_y, 0, height)*stride; | |
1143 dst[y*stride + x]= ( ( src[index ]*(s-frac_x) | |
1144 + src[index +1]* frac_x )*s | |
1145 + r)>>(shift*2); | |
1146 } | |
1147 }else{ | |
1148 if((unsigned)src_y < height){ | |
1149 index= clip(src_x, 0, width) + src_y*stride; | |
1150 dst[y*stride + x]= ( ( src[index ]*(s-frac_y) | |
1151 + src[index+stride ]* frac_y )*s | |
1152 + r)>>(shift*2); | |
1153 }else{ | |
1154 index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride; | |
1155 dst[y*stride + x]= src[index ]; | |
1156 } | |
1157 } | |
1158 | |
1159 vx+= dxx; | |
1160 vy+= dyx; | |
1161 } | |
1162 ox += dxy; | |
1163 oy += dyy; | |
1164 } | |
1165 } | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1166 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1167 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1168 switch(width){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1169 case 2: put_pixels2_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1170 case 4: put_pixels4_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1171 case 8: put_pixels8_c (dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1172 case 16:put_pixels16_c(dst, src, stride, height); break; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1173 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1174 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1175 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1176 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1177 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1178 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1179 for (j=0; j < width; j++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1180 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1181 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1182 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1183 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1184 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1185 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1186 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1187 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1188 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1189 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1190 for (j=0; j < width; j++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1191 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1192 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1193 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1194 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1195 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1196 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1197 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1198 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1199 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1200 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1201 for (j=0; j < width; j++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1202 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1203 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1204 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1205 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1206 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1207 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1208 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1209 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1210 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1211 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1212 for (j=0; j < width; j++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1213 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1214 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1215 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1216 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1217 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1218 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1219 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1220 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1221 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1222 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1223 for (j=0; j < width; j++) { |
1329 | 1224 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1225 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1226 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1227 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1228 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1229 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1230 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1231 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1232 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1233 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1234 for (j=0; j < width; j++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1235 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1236 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1237 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1238 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1239 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1240 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1241 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1242 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1243 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1244 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1245 for (j=0; j < width; j++) { |
1329 | 1246 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1247 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1248 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1249 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1250 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1251 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1252 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1253 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1254 int i,j; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1255 for (i=0; i < height; i++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1256 for (j=0; j < width; j++) { |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1257 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1258 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1259 src += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1260 dst += stride; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1261 } |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1262 } |
1319 | 1263 |
1264 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1265 switch(width){ | |
1266 case 2: avg_pixels2_c (dst, src, stride, height); break; | |
1267 case 4: avg_pixels4_c (dst, src, stride, height); break; | |
1268 case 8: avg_pixels8_c (dst, src, stride, height); break; | |
1269 case 16:avg_pixels16_c(dst, src, stride, height); break; | |
1270 } | |
1271 } | |
1272 | |
1273 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1274 int i,j; | |
1275 for (i=0; i < height; i++) { | |
1276 for (j=0; j < width; j++) { | |
1277 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; | |
1278 } | |
1279 src += stride; | |
1280 dst += stride; | |
1281 } | |
1282 } | |
1283 | |
1284 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1285 int i,j; | |
1286 for (i=0; i < height; i++) { | |
1287 for (j=0; j < width; j++) { | |
1288 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; | |
1289 } | |
1290 src += stride; | |
1291 dst += stride; | |
1292 } | |
1293 } | |
1294 | |
1295 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1296 int i,j; | |
1297 for (i=0; i < height; i++) { | |
1298 for (j=0; j < width; j++) { | |
1299 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; | |
1300 } | |
1301 src += stride; | |
1302 dst += stride; | |
1303 } | |
1304 } | |
1305 | |
1306 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1307 int i,j; | |
1308 for (i=0; i < height; i++) { | |
1309 for (j=0; j < width; j++) { | |
1310 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |
1311 } | |
1312 src += stride; | |
1313 dst += stride; | |
1314 } | |
1315 } | |
1316 | |
1317 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1318 int i,j; | |
1319 for (i=0; i < height; i++) { | |
1320 for (j=0; j < width; j++) { | |
1329 | 1321 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1322 } |
1323 src += stride; | |
1324 dst += stride; | |
1325 } | |
1326 } | |
1327 | |
1328 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1329 int i,j; | |
1330 for (i=0; i < height; i++) { | |
1331 for (j=0; j < width; j++) { | |
1332 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; | |
1333 } | |
1334 src += stride; | |
1335 dst += stride; | |
1336 } | |
1337 } | |
1338 | |
1339 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1340 int i,j; | |
1341 for (i=0; i < height; i++) { | |
1342 for (j=0; j < width; j++) { | |
1329 | 1343 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; |
1319 | 1344 } |
1345 src += stride; | |
1346 dst += stride; | |
1347 } | |
1348 } | |
1349 | |
1350 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ | |
1351 int i,j; | |
1352 for (i=0; i < height; i++) { | |
1353 for (j=0; j < width; j++) { | |
1354 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; | |
1355 } | |
1356 src += stride; | |
1357 dst += stride; | |
1358 } | |
1359 } | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1360 #if 0 |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1361 #define TPEL_WIDTH(width)\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1362 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1363 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1364 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1365 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1366 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1367 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1368 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1369 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1370 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1371 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1372 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1373 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1374 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1375 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1376 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1377 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1378 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1379 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);} |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1380 #endif |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
1381 |
1168 | 1382 #define H264_CHROMA_MC(OPNAME, OP)\ |
1383 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1384 const int A=(8-x)*(8-y);\ | |
1385 const int B=( x)*(8-y);\ | |
1386 const int C=(8-x)*( y);\ | |
1387 const int D=( x)*( y);\ | |
1388 int i;\ | |
1389 \ | |
1390 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1391 \ | |
1392 for(i=0; i<h; i++)\ | |
1393 {\ | |
1394 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | |
1395 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1396 dst+= stride;\ | |
1397 src+= stride;\ | |
1398 }\ | |
1399 }\ | |
1400 \ | |
1401 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1402 const int A=(8-x)*(8-y);\ | |
1403 const int B=( x)*(8-y);\ | |
1404 const int C=(8-x)*( y);\ | |
1405 const int D=( x)*( y);\ | |
1406 int i;\ | |
1407 \ | |
1408 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1409 \ | |
1410 for(i=0; i<h; i++)\ | |
1411 {\ | |
1412 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | |
1413 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1414 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | |
1415 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | |
1416 dst+= stride;\ | |
1417 src+= stride;\ | |
1418 }\ | |
1419 }\ | |
1420 \ | |
1421 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ | |
1422 const int A=(8-x)*(8-y);\ | |
1423 const int B=( x)*(8-y);\ | |
1424 const int C=(8-x)*( y);\ | |
1425 const int D=( x)*( y);\ | |
1426 int i;\ | |
1427 \ | |
1428 assert(x<8 && y<8 && x>=0 && y>=0);\ | |
1429 \ | |
1430 for(i=0; i<h; i++)\ | |
1431 {\ | |
1432 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ | |
1433 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ | |
1434 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ | |
1435 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ | |
1436 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ | |
1437 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ | |
1438 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ | |
1439 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ | |
1440 dst+= stride;\ | |
1441 src+= stride;\ | |
1442 }\ | |
1443 } | |
1444 | |
1445 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) | |
1446 #define op_put(a, b) a = (((b) + 32)>>6) | |
1447 | |
1448 H264_CHROMA_MC(put_ , op_put) | |
1449 H264_CHROMA_MC(avg_ , op_avg) | |
1450 #undef op_avg | |
1451 #undef op_put | |
1452 | |
1453 static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | |
1454 { | |
1455 int i; | |
1456 for(i=0; i<h; i++) | |
1457 { | |
1458 ST32(dst , LD32(src )); | |
1459 dst+=dstStride; | |
1460 src+=srcStride; | |
1461 } | |
1462 } | |
1463 | |
1464 static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | |
1465 { | |
1466 int i; | |
1467 for(i=0; i<h; i++) | |
1468 { | |
1469 ST32(dst , LD32(src )); | |
1470 ST32(dst+4 , LD32(src+4 )); | |
1471 dst+=dstStride; | |
1472 src+=srcStride; | |
1473 } | |
1474 } | |
1475 | |
1476 static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | |
1477 { | |
1478 int i; | |
1479 for(i=0; i<h; i++) | |
1480 { | |
1481 ST32(dst , LD32(src )); | |
1482 ST32(dst+4 , LD32(src+4 )); | |
1483 ST32(dst+8 , LD32(src+8 )); | |
1484 ST32(dst+12, LD32(src+12)); | |
1485 dst+=dstStride; | |
1486 src+=srcStride; | |
1487 } | |
1488 } | |
753 | 1489 |
1064 | 1490 static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) |
255 | 1491 { |
1492 int i; | |
1493 for(i=0; i<h; i++) | |
1494 { | |
651 | 1495 ST32(dst , LD32(src )); |
1496 ST32(dst+4 , LD32(src+4 )); | |
1497 ST32(dst+8 , LD32(src+8 )); | |
1498 ST32(dst+12, LD32(src+12)); | |
1499 dst[16]= src[16]; | |
255 | 1500 dst+=dstStride; |
1501 src+=srcStride; | |
1502 } | |
1503 } | |
1504 | |
1064 | 1505 static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) |
255 | 1506 { |
1507 int i; | |
651 | 1508 for(i=0; i<h; i++) |
255 | 1509 { |
651 | 1510 ST32(dst , LD32(src )); |
1511 ST32(dst+4 , LD32(src+4 )); | |
1512 dst[8]= src[8]; | |
255 | 1513 dst+=dstStride; |
1514 src+=srcStride; | |
1515 } | |
1516 } | |
1517 | |
954 | 1518 |
651 | 1519 #define QPEL_MC(r, OPNAME, RND, OP) \ |
1064 | 1520 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
1521 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
651 | 1522 int i;\ |
1523 for(i=0; i<h; i++)\ | |
1524 {\ | |
1525 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\ | |
1526 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\ | |
1527 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\ | |
1528 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\ | |
1529 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\ | |
1530 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\ | |
1531 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\ | |
1532 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\ | |
1533 dst+=dstStride;\ | |
1534 src+=srcStride;\ | |
1535 }\ | |
1536 }\ | |
1537 \ | |
1064 | 1538 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
984 | 1539 const int w=8;\ |
1064 | 1540 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ |
651 | 1541 int i;\ |
1542 for(i=0; i<w; i++)\ | |
1543 {\ | |
1544 const int src0= src[0*srcStride];\ | |
1545 const int src1= src[1*srcStride];\ | |
1546 const int src2= src[2*srcStride];\ | |
1547 const int src3= src[3*srcStride];\ | |
1548 const int src4= src[4*srcStride];\ | |
1549 const int src5= src[5*srcStride];\ | |
1550 const int src6= src[6*srcStride];\ | |
1551 const int src7= src[7*srcStride];\ | |
1552 const int src8= src[8*srcStride];\ | |
1553 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\ | |
1554 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\ | |
1555 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\ | |
1556 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\ | |
1557 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\ | |
1558 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\ | |
1559 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\ | |
1560 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\ | |
1561 dst++;\ | |
1562 src++;\ | |
1563 }\ | |
1564 }\ | |
1565 \ | |
1064 | 1566 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ |
1567 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
651 | 1568 int i;\ |
954 | 1569 \ |
651 | 1570 for(i=0; i<h; i++)\ |
1571 {\ | |
1572 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\ | |
1573 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\ | |
1574 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\ | |
1575 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\ | |
1576 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\ | |
1577 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\ | |
1578 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\ | |
1579 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\ | |
1580 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\ | |
1581 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\ | |
1582 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\ | |
1583 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\ | |
1584 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\ | |
1585 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\ | |
1586 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\ | |
1587 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\ | |
1588 dst+=dstStride;\ | |
1589 src+=srcStride;\ | |
1590 }\ | |
255 | 1591 }\ |
1592 \ | |
1064 | 1593 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
1594 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
651 | 1595 int i;\ |
954 | 1596 const int w=16;\ |
651 | 1597 for(i=0; i<w; i++)\ |
1598 {\ | |
1599 const int src0= src[0*srcStride];\ | |
1600 const int src1= src[1*srcStride];\ | |
1601 const int src2= src[2*srcStride];\ | |
1602 const int src3= src[3*srcStride];\ | |
1603 const int src4= src[4*srcStride];\ | |
1604 const int src5= src[5*srcStride];\ | |
1605 const int src6= src[6*srcStride];\ | |
1606 const int src7= src[7*srcStride];\ | |
1607 const int src8= src[8*srcStride];\ | |
1608 const int src9= src[9*srcStride];\ | |
1609 const int src10= src[10*srcStride];\ | |
1610 const int src11= src[11*srcStride];\ | |
1611 const int src12= src[12*srcStride];\ | |
1612 const int src13= src[13*srcStride];\ | |
1613 const int src14= src[14*srcStride];\ | |
1614 const int src15= src[15*srcStride];\ | |
1615 const int src16= src[16*srcStride];\ | |
1616 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\ | |
1617 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\ | |
1618 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\ | |
1619 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\ | |
1620 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\ | |
1621 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\ | |
1622 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\ | |
1623 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\ | |
1624 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\ | |
1625 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\ | |
1626 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\ | |
1627 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\ | |
1628 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\ | |
1629 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\ | |
1630 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\ | |
1631 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\ | |
1632 dst++;\ | |
1633 src++;\ | |
1634 }\ | |
255 | 1635 }\ |
1636 \ | |
1064 | 1637 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ |
859 | 1638 OPNAME ## pixels8_c(dst, src, stride, 8);\ |
255 | 1639 }\ |
1640 \ | |
1064 | 1641 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ |
1642 uint8_t half[64];\ | |
651 | 1643 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ |
1644 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\ | |
1645 }\ | |
1646 \ | |
1064 | 1647 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ |
651 | 1648 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\ |
255 | 1649 }\ |
1650 \ | |
1064 | 1651 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ |
1652 uint8_t half[64];\ | |
651 | 1653 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ |
1654 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\ | |
1655 }\ | |
1656 \ | |
1064 | 1657 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ |
1658 uint8_t full[16*9];\ | |
1659 uint8_t half[64];\ | |
651 | 1660 copy_block9(full, src, 16, stride, 9);\ |
984 | 1661 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ |
651 | 1662 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\ |
1663 }\ | |
1664 \ | |
1064 | 1665 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ |
1666 uint8_t full[16*9];\ | |
651 | 1667 copy_block9(full, src, 16, stride, 9);\ |
984 | 1668 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\ |
255 | 1669 }\ |
1670 \ | |
1064 | 1671 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ |
1672 uint8_t full[16*9];\ | |
1673 uint8_t half[64];\ | |
651 | 1674 copy_block9(full, src, 16, stride, 9);\ |
984 | 1675 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ |
651 | 1676 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\ |
1677 }\ | |
1064 | 1678 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1679 uint8_t full[16*9];\ | |
1680 uint8_t halfH[72];\ | |
1681 uint8_t halfV[64];\ | |
1682 uint8_t halfHV[64];\ | |
651 | 1683 copy_block9(full, src, 16, stride, 9);\ |
1684 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1685 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1686 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1687 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1688 }\ |
1064 | 1689 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ |
1690 uint8_t full[16*9];\ | |
1691 uint8_t halfH[72];\ | |
1692 uint8_t halfHV[64];\ | |
984 | 1693 copy_block9(full, src, 16, stride, 9);\ |
1694 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1695 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1696 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1697 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
1698 }\ | |
1064 | 1699 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1700 uint8_t full[16*9];\ | |
1701 uint8_t halfH[72];\ | |
1702 uint8_t halfV[64];\ | |
1703 uint8_t halfHV[64];\ | |
651 | 1704 copy_block9(full, src, 16, stride, 9);\ |
1705 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1706 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1707 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1708 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1709 }\ |
1064 | 1710 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ |
1711 uint8_t full[16*9];\ | |
1712 uint8_t halfH[72];\ | |
1713 uint8_t halfHV[64];\ | |
984 | 1714 copy_block9(full, src, 16, stride, 9);\ |
1715 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1716 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1717 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1718 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ | |
1719 }\ | |
1064 | 1720 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1721 uint8_t full[16*9];\ | |
1722 uint8_t halfH[72];\ | |
1723 uint8_t halfV[64];\ | |
1724 uint8_t halfHV[64];\ | |
651 | 1725 copy_block9(full, src, 16, stride, 9);\ |
1726 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1727 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1728 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1729 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
1730 }\ | |
1064 | 1731 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ |
1732 uint8_t full[16*9];\ | |
1733 uint8_t halfH[72];\ | |
1734 uint8_t halfHV[64];\ | |
984 | 1735 copy_block9(full, src, 16, stride, 9);\ |
1736 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1737 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1738 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1739 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
1740 }\ | |
1064 | 1741 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1742 uint8_t full[16*9];\ | |
1743 uint8_t halfH[72];\ | |
1744 uint8_t halfV[64];\ | |
1745 uint8_t halfHV[64];\ | |
651 | 1746 copy_block9(full, src, 16, stride, 9);\ |
1747 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\ | |
984 | 1748 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1749 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1750 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ |
255 | 1751 }\ |
1064 | 1752 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ |
1753 uint8_t full[16*9];\ | |
1754 uint8_t halfH[72];\ | |
1755 uint8_t halfHV[64];\ | |
984 | 1756 copy_block9(full, src, 16, stride, 9);\ |
1757 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1758 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1759 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
1760 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ | |
1761 }\ | |
1064 | 1762 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ |
1763 uint8_t halfH[72];\ | |
1764 uint8_t halfHV[64];\ | |
651 | 1765 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1766 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ |
651 | 1767 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ |
1768 }\ | |
1064 | 1769 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ |
1770 uint8_t halfH[72];\ | |
1771 uint8_t halfHV[64];\ | |
651 | 1772 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1773 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ |
651 | 1774 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ |
1775 }\ | |
1064 | 1776 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1777 uint8_t full[16*9];\ | |
1778 uint8_t halfH[72];\ | |
1779 uint8_t halfV[64];\ | |
1780 uint8_t halfHV[64];\ | |
651 | 1781 copy_block9(full, src, 16, stride, 9);\ |
1782 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1783 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ |
1784 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1785 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ |
255 | 1786 }\ |
1064 | 1787 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ |
1788 uint8_t full[16*9];\ | |
1789 uint8_t halfH[72];\ | |
984 | 1790 copy_block9(full, src, 16, stride, 9);\ |
1791 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1792 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ | |
1793 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
1794 }\ | |
1064 | 1795 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1796 uint8_t full[16*9];\ | |
1797 uint8_t halfH[72];\ | |
1798 uint8_t halfV[64];\ | |
1799 uint8_t halfHV[64];\ | |
651 | 1800 copy_block9(full, src, 16, stride, 9);\ |
1801 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
984 | 1802 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ |
1803 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ | |
651 | 1804 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ |
1805 }\ | |
1064 | 1806 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ |
1807 uint8_t full[16*9];\ | |
1808 uint8_t halfH[72];\ | |
984 | 1809 copy_block9(full, src, 16, stride, 9);\ |
1810 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ | |
1811 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ | |
1812 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ | |
1813 }\ | |
1064 | 1814 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ |
1815 uint8_t halfH[72];\ | |
651 | 1816 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ |
984 | 1817 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ |
651 | 1818 }\ |
1064 | 1819 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\ |
859 | 1820 OPNAME ## pixels16_c(dst, src, stride, 16);\ |
255 | 1821 }\ |
651 | 1822 \ |
1064 | 1823 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ |
1824 uint8_t half[256];\ | |
651 | 1825 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ |
1826 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\ | |
1827 }\ | |
1828 \ | |
1064 | 1829 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ |
651 | 1830 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\ |
1831 }\ | |
1832 \ | |
1064 | 1833 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ |
1834 uint8_t half[256];\ | |
651 | 1835 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ |
1836 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\ | |
1837 }\ | |
1838 \ | |
1064 | 1839 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ |
1840 uint8_t full[24*17];\ | |
1841 uint8_t half[256];\ | |
651 | 1842 copy_block17(full, src, 24, stride, 17);\ |
954 | 1843 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ |
651 | 1844 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\ |
255 | 1845 }\ |
651 | 1846 \ |
1064 | 1847 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ |
1848 uint8_t full[24*17];\ | |
651 | 1849 copy_block17(full, src, 24, stride, 17);\ |
954 | 1850 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\ |
651 | 1851 }\ |
1852 \ | |
1064 | 1853 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ |
1854 uint8_t full[24*17];\ | |
1855 uint8_t half[256];\ | |
651 | 1856 copy_block17(full, src, 24, stride, 17);\ |
954 | 1857 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ |
651 | 1858 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\ |
255 | 1859 }\ |
1064 | 1860 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1861 uint8_t full[24*17];\ | |
1862 uint8_t halfH[272];\ | |
1863 uint8_t halfV[256];\ | |
1864 uint8_t halfHV[256];\ | |
651 | 1865 copy_block17(full, src, 24, stride, 17);\ |
1866 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1867 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
1868 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1869 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
1870 }\ | |
1064 | 1871 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ |
1872 uint8_t full[24*17];\ | |
1873 uint8_t halfH[272];\ | |
1874 uint8_t halfHV[256];\ | |
984 | 1875 copy_block17(full, src, 24, stride, 17);\ |
1876 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1877 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
1878 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1879 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
1880 }\ | |
1064 | 1881 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1882 uint8_t full[24*17];\ | |
1883 uint8_t halfH[272];\ | |
1884 uint8_t halfV[256];\ | |
1885 uint8_t halfHV[256];\ | |
651 | 1886 copy_block17(full, src, 24, stride, 17);\ |
1887 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1888 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
1889 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1890 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
1891 }\ | |
1064 | 1892 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ |
1893 uint8_t full[24*17];\ | |
1894 uint8_t halfH[272];\ | |
1895 uint8_t halfHV[256];\ | |
984 | 1896 copy_block17(full, src, 24, stride, 17);\ |
1897 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1898 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
1899 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1900 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ | |
1901 }\ | |
1064 | 1902 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1903 uint8_t full[24*17];\ | |
1904 uint8_t halfH[272];\ | |
1905 uint8_t halfV[256];\ | |
1906 uint8_t halfHV[256];\ | |
651 | 1907 copy_block17(full, src, 24, stride, 17);\ |
1908 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1909 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
1910 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1911 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
255 | 1912 }\ |
1064 | 1913 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ |
1914 uint8_t full[24*17];\ | |
1915 uint8_t halfH[272];\ | |
1916 uint8_t halfHV[256];\ | |
984 | 1917 copy_block17(full, src, 24, stride, 17);\ |
1918 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1919 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
1920 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1921 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
1922 }\ | |
1064 | 1923 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1924 uint8_t full[24*17];\ | |
1925 uint8_t halfH[272];\ | |
1926 uint8_t halfV[256];\ | |
1927 uint8_t halfHV[256];\ | |
651 | 1928 copy_block17(full, src, 24, stride, 17);\ |
1929 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\ | |
954 | 1930 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
1931 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1932 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ |
1933 }\ | |
1064 | 1934 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ |
1935 uint8_t full[24*17];\ | |
1936 uint8_t halfH[272];\ | |
1937 uint8_t halfHV[256];\ | |
984 | 1938 copy_block17(full, src, 24, stride, 17);\ |
1939 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1940 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
1941 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
1942 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ | |
1943 }\ | |
1064 | 1944 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ |
1945 uint8_t halfH[272];\ | |
1946 uint8_t halfHV[256];\ | |
651 | 1947 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 1948 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ |
651 | 1949 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ |
255 | 1950 }\ |
1064 | 1951 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ |
1952 uint8_t halfH[272];\ | |
1953 uint8_t halfHV[256];\ | |
651 | 1954 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 1955 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ |
651 | 1956 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ |
1957 }\ | |
1064 | 1958 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1959 uint8_t full[24*17];\ | |
1960 uint8_t halfH[272];\ | |
1961 uint8_t halfV[256];\ | |
1962 uint8_t halfHV[256];\ | |
651 | 1963 copy_block17(full, src, 24, stride, 17);\ |
1964 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1965 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ |
1966 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1967 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ |
255 | 1968 }\ |
1064 | 1969 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ |
1970 uint8_t full[24*17];\ | |
1971 uint8_t halfH[272];\ | |
984 | 1972 copy_block17(full, src, 24, stride, 17);\ |
1973 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1974 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ | |
1975 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
1976 }\ | |
1064 | 1977 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ |
1978 uint8_t full[24*17];\ | |
1979 uint8_t halfH[272];\ | |
1980 uint8_t halfV[256];\ | |
1981 uint8_t halfHV[256];\ | |
651 | 1982 copy_block17(full, src, 24, stride, 17);\ |
1983 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
954 | 1984 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ |
1985 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ | |
651 | 1986 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ |
1987 }\ | |
1064 | 1988 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ |
1989 uint8_t full[24*17];\ | |
1990 uint8_t halfH[272];\ | |
984 | 1991 copy_block17(full, src, 24, stride, 17);\ |
1992 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ | |
1993 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ | |
1994 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ | |
1995 }\ | |
1064 | 1996 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ |
1997 uint8_t halfH[272];\ | |
651 | 1998 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ |
954 | 1999 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ |
859 | 2000 } |
255 | 2001 |
651 | 2002 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) |
2003 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1) | |
2004 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
2005 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5] | |
2006 | |
2007 QPEL_MC(0, put_ , _ , op_put) | |
2008 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) | |
2009 QPEL_MC(0, avg_ , _ , op_avg) | |
2010 //QPEL_MC(1, avg_no_rnd , _ , op_avg) | |
2011 #undef op_avg | |
2012 #undef op_avg_no_rnd | |
2013 #undef op_put | |
2014 #undef op_put_no_rnd | |
255 | 2015 |
1168 | 2016 #if 1 |
2017 #define H264_LOWPASS(OPNAME, OP, OP2) \ | |
2018 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2019 const int h=4;\ | |
2020 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
2021 int i;\ | |
2022 for(i=0; i<h; i++)\ | |
2023 {\ | |
2024 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ | |
2025 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ | |
2026 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ | |
2027 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ | |
2028 dst+=dstStride;\ | |
2029 src+=srcStride;\ | |
2030 }\ | |
2031 }\ | |
2032 \ | |
2033 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2034 const int w=4;\ | |
2035 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
2036 int i;\ | |
2037 for(i=0; i<w; i++)\ | |
2038 {\ | |
2039 const int srcB= src[-2*srcStride];\ | |
2040 const int srcA= src[-1*srcStride];\ | |
2041 const int src0= src[0 *srcStride];\ | |
2042 const int src1= src[1 *srcStride];\ | |
2043 const int src2= src[2 *srcStride];\ | |
2044 const int src3= src[3 *srcStride];\ | |
2045 const int src4= src[4 *srcStride];\ | |
2046 const int src5= src[5 *srcStride];\ | |
2047 const int src6= src[6 *srcStride];\ | |
2048 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
2049 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
2050 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
2051 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
2052 dst++;\ | |
2053 src++;\ | |
2054 }\ | |
2055 }\ | |
2056 \ | |
2057 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2058 const int h=4;\ | |
2059 const int w=4;\ | |
2060 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
2061 int i;\ | |
2062 src -= 2*srcStride;\ | |
2063 for(i=0; i<h+5; i++)\ | |
2064 {\ | |
2065 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ | |
2066 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ | |
2067 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\ | |
2068 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\ | |
2069 tmp+=tmpStride;\ | |
2070 src+=srcStride;\ | |
2071 }\ | |
2072 tmp -= tmpStride*(h+5-2);\ | |
2073 for(i=0; i<w; i++)\ | |
2074 {\ | |
2075 const int tmpB= tmp[-2*tmpStride];\ | |
2076 const int tmpA= tmp[-1*tmpStride];\ | |
2077 const int tmp0= tmp[0 *tmpStride];\ | |
2078 const int tmp1= tmp[1 *tmpStride];\ | |
2079 const int tmp2= tmp[2 *tmpStride];\ | |
2080 const int tmp3= tmp[3 *tmpStride];\ | |
2081 const int tmp4= tmp[4 *tmpStride];\ | |
2082 const int tmp5= tmp[5 *tmpStride];\ | |
2083 const int tmp6= tmp[6 *tmpStride];\ | |
2084 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |
2085 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |
2086 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | |
2087 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | |
2088 dst++;\ | |
2089 tmp++;\ | |
2090 }\ | |
2091 }\ | |
2092 \ | |
2093 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2094 const int h=8;\ | |
2095 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
2096 int i;\ | |
2097 for(i=0; i<h; i++)\ | |
2098 {\ | |
2099 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\ | |
2100 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\ | |
2101 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\ | |
2102 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\ | |
2103 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\ | |
2104 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\ | |
2105 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\ | |
2106 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\ | |
2107 dst+=dstStride;\ | |
2108 src+=srcStride;\ | |
2109 }\ | |
2110 }\ | |
2111 \ | |
2112 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2113 const int w=8;\ | |
2114 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
2115 int i;\ | |
2116 for(i=0; i<w; i++)\ | |
2117 {\ | |
2118 const int srcB= src[-2*srcStride];\ | |
2119 const int srcA= src[-1*srcStride];\ | |
2120 const int src0= src[0 *srcStride];\ | |
2121 const int src1= src[1 *srcStride];\ | |
2122 const int src2= src[2 *srcStride];\ | |
2123 const int src3= src[3 *srcStride];\ | |
2124 const int src4= src[4 *srcStride];\ | |
2125 const int src5= src[5 *srcStride];\ | |
2126 const int src6= src[6 *srcStride];\ | |
2127 const int src7= src[7 *srcStride];\ | |
2128 const int src8= src[8 *srcStride];\ | |
2129 const int src9= src[9 *srcStride];\ | |
2130 const int src10=src[10*srcStride];\ | |
2131 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |
2132 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |
2133 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ | |
2134 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ | |
2135 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ | |
2136 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ | |
2137 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ | |
2138 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ | |
2139 dst++;\ | |
2140 src++;\ | |
2141 }\ | |
2142 }\ | |
2143 \ | |
2144 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2145 const int h=8;\ | |
2146 const int w=8;\ | |
2147 uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |
2148 int i;\ | |
2149 src -= 2*srcStride;\ | |
2150 for(i=0; i<h+5; i++)\ | |
2151 {\ | |
2152 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\ | |
2153 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\ | |
2154 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\ | |
2155 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\ | |
2156 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\ | |
2157 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\ | |
2158 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\ | |
2159 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\ | |
2160 tmp+=tmpStride;\ | |
2161 src+=srcStride;\ | |
2162 }\ | |
2163 tmp -= tmpStride*(h+5-2);\ | |
2164 for(i=0; i<w; i++)\ | |
2165 {\ | |
2166 const int tmpB= tmp[-2*tmpStride];\ | |
2167 const int tmpA= tmp[-1*tmpStride];\ | |
2168 const int tmp0= tmp[0 *tmpStride];\ | |
2169 const int tmp1= tmp[1 *tmpStride];\ | |
2170 const int tmp2= tmp[2 *tmpStride];\ | |
2171 const int tmp3= tmp[3 *tmpStride];\ | |
2172 const int tmp4= tmp[4 *tmpStride];\ | |
2173 const int tmp5= tmp[5 *tmpStride];\ | |
2174 const int tmp6= tmp[6 *tmpStride];\ | |
2175 const int tmp7= tmp[7 *tmpStride];\ | |
2176 const int tmp8= tmp[8 *tmpStride];\ | |
2177 const int tmp9= tmp[9 *tmpStride];\ | |
2178 const int tmp10=tmp[10*tmpStride];\ | |
2179 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |
2180 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |
2181 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ | |
2182 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ | |
2183 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\ | |
2184 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\ | |
2185 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\ | |
2186 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\ | |
2187 dst++;\ | |
2188 tmp++;\ | |
2189 }\ | |
2190 }\ | |
2191 \ | |
2192 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2193 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | |
2194 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2195 src += 8*srcStride;\ | |
2196 dst += 8*dstStride;\ | |
2197 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ | |
2198 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2199 }\ | |
2200 \ | |
2201 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |
2202 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | |
2203 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2204 src += 8*srcStride;\ | |
2205 dst += 8*dstStride;\ | |
2206 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ | |
2207 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ | |
2208 }\ | |
2209 \ | |
2210 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |
2211 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | |
2212 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | |
2213 src += 8*srcStride;\ | |
2214 dst += 8*dstStride;\ | |
2215 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ | |
2216 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ | |
2217 }\ | |
2218 | |
2219 #define H264_MC(OPNAME, SIZE) \ | |
2220 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\ | |
2221 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ | |
2222 }\ | |
2223 \ | |
2224 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2225 uint8_t half[SIZE*SIZE];\ | |
2226 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
2227 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\ | |
2228 }\ | |
2229 \ | |
2230 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2231 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ | |
2232 }\ | |
2233 \ | |
2234 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2235 uint8_t half[SIZE*SIZE];\ | |
2236 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ | |
2237 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\ | |
2238 }\ | |
2239 \ | |
2240 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2241 uint8_t full[SIZE*(SIZE+5)];\ | |
2242 uint8_t * const full_mid= full + SIZE*2;\ | |
2243 uint8_t half[SIZE*SIZE];\ | |
2244 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2245 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
2246 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ | |
2247 }\ | |
2248 \ | |
2249 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2250 uint8_t full[SIZE*(SIZE+5)];\ | |
2251 uint8_t * const full_mid= full + SIZE*2;\ | |
2252 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2253 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ | |
2254 }\ | |
2255 \ | |
2256 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2257 uint8_t full[SIZE*(SIZE+5)];\ | |
2258 uint8_t * const full_mid= full + SIZE*2;\ | |
2259 uint8_t half[SIZE*SIZE];\ | |
2260 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2261 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ | |
2262 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ | |
2263 }\ | |
2264 \ | |
2265 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2266 uint8_t full[SIZE*(SIZE+5)];\ | |
2267 uint8_t * const full_mid= full + SIZE*2;\ | |
2268 uint8_t halfH[SIZE*SIZE];\ | |
2269 uint8_t halfV[SIZE*SIZE];\ | |
2270 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2271 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2272 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2273 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2274 }\ | |
2275 \ | |
2276 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2277 uint8_t full[SIZE*(SIZE+5)];\ | |
2278 uint8_t * const full_mid= full + SIZE*2;\ | |
2279 uint8_t halfH[SIZE*SIZE];\ | |
2280 uint8_t halfV[SIZE*SIZE];\ | |
2281 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2282 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2283 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2284 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2285 }\ | |
2286 \ | |
2287 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2288 uint8_t full[SIZE*(SIZE+5)];\ | |
2289 uint8_t * const full_mid= full + SIZE*2;\ | |
2290 uint8_t halfH[SIZE*SIZE];\ | |
2291 uint8_t halfV[SIZE*SIZE];\ | |
2292 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2293 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2294 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2295 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2296 }\ | |
2297 \ | |
2298 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2299 uint8_t full[SIZE*(SIZE+5)];\ | |
2300 uint8_t * const full_mid= full + SIZE*2;\ | |
2301 uint8_t halfH[SIZE*SIZE];\ | |
2302 uint8_t halfV[SIZE*SIZE];\ | |
2303 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2304 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2305 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2306 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ | |
2307 }\ | |
2308 \ | |
2309 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2310 int16_t tmp[SIZE*(SIZE+5)];\ | |
2311 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ | |
2312 }\ | |
2313 \ | |
2314 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2315 int16_t tmp[SIZE*(SIZE+5)];\ | |
2316 uint8_t halfH[SIZE*SIZE];\ | |
2317 uint8_t halfHV[SIZE*SIZE];\ | |
2318 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ | |
2319 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2320 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2321 }\ | |
2322 \ | |
2323 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2324 int16_t tmp[SIZE*(SIZE+5)];\ | |
2325 uint8_t halfH[SIZE*SIZE];\ | |
2326 uint8_t halfHV[SIZE*SIZE];\ | |
2327 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ | |
2328 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2329 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2330 }\ | |
2331 \ | |
2332 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2333 uint8_t full[SIZE*(SIZE+5)];\ | |
2334 uint8_t * const full_mid= full + SIZE*2;\ | |
2335 int16_t tmp[SIZE*(SIZE+5)];\ | |
2336 uint8_t halfV[SIZE*SIZE];\ | |
2337 uint8_t halfHV[SIZE*SIZE];\ | |
2338 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ | |
2339 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2340 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2341 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2342 }\ | |
2343 \ | |
2344 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ | |
2345 uint8_t full[SIZE*(SIZE+5)];\ | |
2346 uint8_t * const full_mid= full + SIZE*2;\ | |
2347 int16_t tmp[SIZE*(SIZE+5)];\ | |
2348 uint8_t halfV[SIZE*SIZE];\ | |
2349 uint8_t halfHV[SIZE*SIZE];\ | |
2350 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ | |
2351 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ | |
2352 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ | |
2353 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ | |
2354 }\ | |
2355 | |
2356 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) | |
2357 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) | |
2358 #define op_put(a, b) a = cm[((b) + 16)>>5] | |
2359 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) | |
2360 #define op2_put(a, b) a = cm[((b) + 512)>>10] | |
2361 | |
2362 H264_LOWPASS(put_ , op_put, op2_put) | |
2363 H264_LOWPASS(avg_ , op_avg, op2_avg) | |
2364 H264_MC(put_, 4) | |
2365 H264_MC(put_, 8) | |
2366 H264_MC(put_, 16) | |
2367 H264_MC(avg_, 4) | |
2368 H264_MC(avg_, 8) | |
2369 H264_MC(avg_, 16) | |
2370 | |
2371 #undef op_avg | |
2372 #undef op_put | |
2373 #undef op2_avg | |
2374 #undef op2_put | |
2375 #endif | |
2376 | |
2448 | 2377 #define op_scale1(x) block[x] = clip_uint8( (block[x]*weight + offset) >> log2_denom ) |
2378 #define op_scale2(x) dst[x] = clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) | |
2415 | 2379 #define H264_WEIGHT(W,H) \ |
2380 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \ | |
2381 int x, y; \ | |
2382 offset <<= log2_denom; \ | |
2383 if(log2_denom) offset += 1<<(log2_denom-1); \ | |
2384 for(y=0; y<H; y++, block += stride){ \ | |
2385 op_scale1(0); \ | |
2386 op_scale1(1); \ | |
2387 if(W==2) continue; \ | |
2388 op_scale1(2); \ | |
2389 op_scale1(3); \ | |
2390 if(W==4) continue; \ | |
2391 op_scale1(4); \ | |
2392 op_scale1(5); \ | |
2393 op_scale1(6); \ | |
2394 op_scale1(7); \ | |
2395 if(W==8) continue; \ | |
2396 op_scale1(8); \ | |
2397 op_scale1(9); \ | |
2398 op_scale1(10); \ | |
2399 op_scale1(11); \ | |
2400 op_scale1(12); \ | |
2401 op_scale1(13); \ | |
2402 op_scale1(14); \ | |
2403 op_scale1(15); \ | |
2404 } \ | |
2405 } \ | |
2406 static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offsetd, int offsets){ \ | |
2407 int x, y; \ | |
2408 int offset = (offsets + offsetd + 1) >> 1; \ | |
2409 offset = ((offset << 1) + 1) << log2_denom; \ | |
2410 for(y=0; y<H; y++, dst += stride, src += stride){ \ | |
2411 op_scale2(0); \ | |
2412 op_scale2(1); \ | |
2413 if(W==2) continue; \ | |
2414 op_scale2(2); \ | |
2415 op_scale2(3); \ | |
2416 if(W==4) continue; \ | |
2417 op_scale2(4); \ | |
2418 op_scale2(5); \ | |
2419 op_scale2(6); \ | |
2420 op_scale2(7); \ | |
2421 if(W==8) continue; \ | |
2422 op_scale2(8); \ | |
2423 op_scale2(9); \ | |
2424 op_scale2(10); \ | |
2425 op_scale2(11); \ | |
2426 op_scale2(12); \ | |
2427 op_scale2(13); \ | |
2428 op_scale2(14); \ | |
2429 op_scale2(15); \ | |
2430 } \ | |
2431 } | |
2432 | |
2433 H264_WEIGHT(16,16) | |
2434 H264_WEIGHT(16,8) | |
2435 H264_WEIGHT(8,16) | |
2436 H264_WEIGHT(8,8) | |
2437 H264_WEIGHT(8,4) | |
2438 H264_WEIGHT(4,8) | |
2439 H264_WEIGHT(4,4) | |
2440 H264_WEIGHT(4,2) | |
2441 H264_WEIGHT(2,4) | |
2442 H264_WEIGHT(2,2) | |
2443 | |
2444 #undef op_scale1 | |
2445 #undef op_scale2 | |
2446 #undef H264_WEIGHT | |
2447 | |
936 | 2448 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ |
2449 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
2450 int i; | |
2451 | |
2452 for(i=0; i<h; i++){ | |
2453 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4]; | |
2454 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4]; | |
2455 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4]; | |
2456 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4]; | |
2457 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4]; | |
2458 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; | |
2459 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; | |
2460 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; | |
2461 dst+=dstStride; | |
2462 src+=srcStride; | |
2463 } | |
2464 } | |
2465 | |
2466 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ | |
2467 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
2468 int i; | |
2469 | |
2470 for(i=0; i<w; i++){ | |
2471 const int src_1= src[ -srcStride]; | |
2472 const int src0 = src[0 ]; | |
2473 const int src1 = src[ srcStride]; | |
2474 const int src2 = src[2*srcStride]; | |
2475 const int src3 = src[3*srcStride]; | |
2476 const int src4 = src[4*srcStride]; | |
2477 const int src5 = src[5*srcStride]; | |
2478 const int src6 = src[6*srcStride]; | |
2479 const int src7 = src[7*srcStride]; | |
2480 const int src8 = src[8*srcStride]; | |
2481 const int src9 = src[9*srcStride]; | |
2482 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; | |
2483 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; | |
2484 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; | |
2485 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; | |
2486 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; | |
2487 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; | |
2488 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; | |
2489 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; | |
2490 src++; | |
2491 dst++; | |
2492 } | |
2493 } | |
2494 | |
2495 static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){ | |
2496 put_pixels8_c(dst, src, stride, 8); | |
2497 } | |
2498 | |
2499 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ | |
2500 uint8_t half[64]; | |
2501 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | |
2502 put_pixels8_l2(dst, src, half, stride, stride, 8, 8); | |
2503 } | |
2504 | |
2505 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ | |
2506 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); | |
2507 } | |
2508 | |
2509 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ | |
2510 uint8_t half[64]; | |
2511 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); | |
2512 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8); | |
2513 } | |
2514 | |
2515 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ | |
2516 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); | |
2517 } | |
2518 | |
2519 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ | |
2520 uint8_t halfH[88]; | |
2521 uint8_t halfV[64]; | |
2522 uint8_t halfHV[64]; | |
2523 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2524 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); | |
2525 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | |
2526 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | |
2527 } | |
2528 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ | |
2529 uint8_t halfH[88]; | |
2530 uint8_t halfV[64]; | |
2531 uint8_t halfHV[64]; | |
2532 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2533 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); | |
2534 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); | |
2535 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); | |
2536 } | |
2537 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ | |
2538 uint8_t halfH[88]; | |
2539 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); | |
2540 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); | |
2541 } | |
2542 | |
1644 | 2543 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){ |
2544 int x; | |
2545 const int strength= ff_h263_loop_filter_strength[qscale]; | |
2546 | |
2547 for(x=0; x<8; x++){ | |
2548 int d1, d2, ad1; | |
2549 int p0= src[x-2*stride]; | |
2550 int p1= src[x-1*stride]; | |
2551 int p2= src[x+0*stride]; | |
2552 int p3= src[x+1*stride]; | |
2553 int d = (p0 - p3 + 4*(p2 - p1)) / 8; | |
2554 | |
2555 if (d<-2*strength) d1= 0; | |
2556 else if(d<- strength) d1=-2*strength - d; | |
2557 else if(d< strength) d1= d; | |
2558 else if(d< 2*strength) d1= 2*strength - d; | |
2559 else d1= 0; | |
2560 | |
2561 p1 += d1; | |
2562 p2 -= d1; | |
2563 if(p1&256) p1= ~(p1>>31); | |
2564 if(p2&256) p2= ~(p2>>31); | |
2565 | |
2566 src[x-1*stride] = p1; | |
2567 src[x+0*stride] = p2; | |
2568 | |
1645 | 2569 ad1= ABS(d1)>>1; |
1644 | 2570 |
2571 d2= clip((p0-p3)/4, -ad1, ad1); | |
2572 | |
2573 src[x-2*stride] = p0 - d2; | |
2574 src[x+ stride] = p3 + d2; | |
2575 } | |
2576 } | |
2577 | |
2578 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ | |
2579 int y; | |
2580 const int strength= ff_h263_loop_filter_strength[qscale]; | |
2581 | |
2582 for(y=0; y<8; y++){ | |
2583 int d1, d2, ad1; | |
2584 int p0= src[y*stride-2]; | |
2585 int p1= src[y*stride-1]; | |
2586 int p2= src[y*stride+0]; | |
2587 int p3= src[y*stride+1]; | |
2588 int d = (p0 - p3 + 4*(p2 - p1)) / 8; | |
2589 | |
2590 if (d<-2*strength) d1= 0; | |
2591 else if(d<- strength) d1=-2*strength - d; | |
2592 else if(d< strength) d1= d; | |
2593 else if(d< 2*strength) d1= 2*strength - d; | |
2594 else d1= 0; | |
2595 | |
2596 p1 += d1; | |
2597 p2 -= d1; | |
2598 if(p1&256) p1= ~(p1>>31); | |
2599 if(p2&256) p2= ~(p2>>31); | |
2600 | |
2601 src[y*stride-1] = p1; | |
2602 src[y*stride+0] = p2; | |
2603 | |
2604 ad1= ABS(d1)>>1; | |
2605 | |
2606 d2= clip((p0-p3)/4, -ad1, ad1); | |
2607 | |
2608 src[y*stride-2] = p0 - d2; | |
2609 src[y*stride+1] = p3 + d2; | |
2610 } | |
2611 } | |
936 | 2612 |
2045 | 2613 static void h261_loop_filter_c(uint8_t *src, int stride){ |
2614 int x,y,xy,yz; | |
2615 int temp[64]; | |
2616 | |
2617 for(x=0; x<8; x++){ | |
2618 temp[x ] = 4*src[x ]; | |
2619 temp[x + 7*8] = 4*src[x + 7*stride]; | |
2620 } | |
2621 for(y=1; y<7; y++){ | |
2622 for(x=0; x<8; x++){ | |
2623 xy = y * stride + x; | |
2624 yz = y * 8 + x; | |
2625 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride]; | |
2044
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2626 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2627 } |
2045 | 2628 |
2629 for(y=0; y<8; y++){ | |
2630 src[ y*stride] = (temp[ y*8] + 2)>>2; | |
2631 src[7+y*stride] = (temp[7+y*8] + 2)>>2; | |
2632 for(x=1; x<7; x++){ | |
2633 xy = y * stride + x; | |
2634 yz = y * 8 + x; | |
2635 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4; | |
2044
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2636 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2637 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2638 } |
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
2639 |
1708 | 2640 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2641 { |
2642 int s, i; | |
2643 | |
2644 s = 0; | |
1708 | 2645 for(i=0;i<h;i++) { |
0 | 2646 s += abs(pix1[0] - pix2[0]); |
2647 s += abs(pix1[1] - pix2[1]); | |
2648 s += abs(pix1[2] - pix2[2]); | |
2649 s += abs(pix1[3] - pix2[3]); | |
2650 s += abs(pix1[4] - pix2[4]); | |
2651 s += abs(pix1[5] - pix2[5]); | |
2652 s += abs(pix1[6] - pix2[6]); | |
2653 s += abs(pix1[7] - pix2[7]); | |
2654 s += abs(pix1[8] - pix2[8]); | |
2655 s += abs(pix1[9] - pix2[9]); | |
2656 s += abs(pix1[10] - pix2[10]); | |
2657 s += abs(pix1[11] - pix2[11]); | |
2658 s += abs(pix1[12] - pix2[12]); | |
2659 s += abs(pix1[13] - pix2[13]); | |
2660 s += abs(pix1[14] - pix2[14]); | |
2661 s += abs(pix1[15] - pix2[15]); | |
2662 pix1 += line_size; | |
2663 pix2 += line_size; | |
2664 } | |
2665 return s; | |
2666 } | |
2667 | |
1708 | 2668 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2669 { |
2670 int s, i; | |
2671 | |
2672 s = 0; | |
1708 | 2673 for(i=0;i<h;i++) { |
0 | 2674 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
2675 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
2676 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
2677 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
2678 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
2679 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
2680 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
2681 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
2682 s += abs(pix1[8] - avg2(pix2[8], pix2[9])); | |
2683 s += abs(pix1[9] - avg2(pix2[9], pix2[10])); | |
2684 s += abs(pix1[10] - avg2(pix2[10], pix2[11])); | |
2685 s += abs(pix1[11] - avg2(pix2[11], pix2[12])); | |
2686 s += abs(pix1[12] - avg2(pix2[12], pix2[13])); | |
2687 s += abs(pix1[13] - avg2(pix2[13], pix2[14])); | |
2688 s += abs(pix1[14] - avg2(pix2[14], pix2[15])); | |
2689 s += abs(pix1[15] - avg2(pix2[15], pix2[16])); | |
2690 pix1 += line_size; | |
2691 pix2 += line_size; | |
2692 } | |
2693 return s; | |
2694 } | |
2695 | |
1708 | 2696 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2697 { |
2698 int s, i; | |
1064 | 2699 uint8_t *pix3 = pix2 + line_size; |
0 | 2700 |
2701 s = 0; | |
1708 | 2702 for(i=0;i<h;i++) { |
0 | 2703 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
2704 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
2705 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
2706 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
2707 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
2708 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
2709 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
2710 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
2711 s += abs(pix1[8] - avg2(pix2[8], pix3[8])); | |
2712 s += abs(pix1[9] - avg2(pix2[9], pix3[9])); | |
2713 s += abs(pix1[10] - avg2(pix2[10], pix3[10])); | |
2714 s += abs(pix1[11] - avg2(pix2[11], pix3[11])); | |
2715 s += abs(pix1[12] - avg2(pix2[12], pix3[12])); | |
2716 s += abs(pix1[13] - avg2(pix2[13], pix3[13])); | |
2717 s += abs(pix1[14] - avg2(pix2[14], pix3[14])); | |
2718 s += abs(pix1[15] - avg2(pix2[15], pix3[15])); | |
2719 pix1 += line_size; | |
2720 pix2 += line_size; | |
2721 pix3 += line_size; | |
2722 } | |
2723 return s; | |
2724 } | |
2725 | |
1708 | 2726 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
0 | 2727 { |
2728 int s, i; | |
1064 | 2729 uint8_t *pix3 = pix2 + line_size; |
0 | 2730 |
2731 s = 0; | |
1708 | 2732 for(i=0;i<h;i++) { |
0 | 2733 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
2734 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
2735 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
2736 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
2737 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
2738 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
2739 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
2740 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
2741 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); | |
2742 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); | |
2743 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); | |
2744 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); | |
2745 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); | |
2746 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); | |
2747 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); | |
2748 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); | |
2749 pix1 += line_size; | |
2750 pix2 += line_size; | |
2751 pix3 += line_size; | |
2752 } | |
2753 return s; | |
2754 } | |
2755 | |
1708 | 2756 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2757 { |
2758 int s, i; | |
2759 | |
2760 s = 0; | |
1708 | 2761 for(i=0;i<h;i++) { |
294 | 2762 s += abs(pix1[0] - pix2[0]); |
2763 s += abs(pix1[1] - pix2[1]); | |
2764 s += abs(pix1[2] - pix2[2]); | |
2765 s += abs(pix1[3] - pix2[3]); | |
2766 s += abs(pix1[4] - pix2[4]); | |
2767 s += abs(pix1[5] - pix2[5]); | |
2768 s += abs(pix1[6] - pix2[6]); | |
2769 s += abs(pix1[7] - pix2[7]); | |
2770 pix1 += line_size; | |
2771 pix2 += line_size; | |
2772 } | |
2773 return s; | |
2774 } | |
2775 | |
1708 | 2776 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2777 { |
2778 int s, i; | |
2779 | |
2780 s = 0; | |
1708 | 2781 for(i=0;i<h;i++) { |
294 | 2782 s += abs(pix1[0] - avg2(pix2[0], pix2[1])); |
2783 s += abs(pix1[1] - avg2(pix2[1], pix2[2])); | |
2784 s += abs(pix1[2] - avg2(pix2[2], pix2[3])); | |
2785 s += abs(pix1[3] - avg2(pix2[3], pix2[4])); | |
2786 s += abs(pix1[4] - avg2(pix2[4], pix2[5])); | |
2787 s += abs(pix1[5] - avg2(pix2[5], pix2[6])); | |
2788 s += abs(pix1[6] - avg2(pix2[6], pix2[7])); | |
2789 s += abs(pix1[7] - avg2(pix2[7], pix2[8])); | |
2790 pix1 += line_size; | |
2791 pix2 += line_size; | |
2792 } | |
2793 return s; | |
2794 } | |
2795 | |
1708 | 2796 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2797 { |
2798 int s, i; | |
1064 | 2799 uint8_t *pix3 = pix2 + line_size; |
294 | 2800 |
2801 s = 0; | |
1708 | 2802 for(i=0;i<h;i++) { |
294 | 2803 s += abs(pix1[0] - avg2(pix2[0], pix3[0])); |
2804 s += abs(pix1[1] - avg2(pix2[1], pix3[1])); | |
2805 s += abs(pix1[2] - avg2(pix2[2], pix3[2])); | |
2806 s += abs(pix1[3] - avg2(pix2[3], pix3[3])); | |
2807 s += abs(pix1[4] - avg2(pix2[4], pix3[4])); | |
2808 s += abs(pix1[5] - avg2(pix2[5], pix3[5])); | |
2809 s += abs(pix1[6] - avg2(pix2[6], pix3[6])); | |
2810 s += abs(pix1[7] - avg2(pix2[7], pix3[7])); | |
2811 pix1 += line_size; | |
2812 pix2 += line_size; | |
2813 pix3 += line_size; | |
2814 } | |
2815 return s; | |
2816 } | |
2817 | |
1708 | 2818 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) |
294 | 2819 { |
2820 int s, i; | |
1064 | 2821 uint8_t *pix3 = pix2 + line_size; |
294 | 2822 |
2823 s = 0; | |
1708 | 2824 for(i=0;i<h;i++) { |
294 | 2825 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); |
2826 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); | |
2827 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); | |
2828 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); | |
2829 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); | |
2830 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); | |
2831 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); | |
2832 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); | |
2833 pix1 += line_size; | |
2834 pix2 += line_size; | |
2835 pix3 += line_size; | |
2836 } | |
2837 return s; | |
2838 } | |
2839 | |
2066 | 2840 static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int h){ |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2841 int score1=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2842 int score2=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2843 int x,y; |
2066 | 2844 |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2845 for(y=0; y<h; y++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2846 for(x=0; x<16; x++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2847 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2848 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2849 if(y+1<h){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2850 for(x=0; x<15; x++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2851 score2+= ABS( s1[x ] - s1[x +stride] |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2852 - s1[x+1] + s1[x+1+stride]) |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2853 -ABS( s2[x ] - s2[x +stride] |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2854 - s2[x+1] + s2[x+1+stride]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2855 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2856 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2857 s1+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2858 s2+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2859 } |
2066 | 2860 |
2861 if(c) return score1 + ABS(score2)*c->avctx->nsse_weight; | |
2862 else return score1 + ABS(score2)*8; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2863 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2864 |
2066 | 2865 static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int h){ |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2866 int score1=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2867 int score2=0; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2868 int x,y; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2869 |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2870 for(y=0; y<h; y++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2871 for(x=0; x<8; x++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2872 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2873 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2874 if(y+1<h){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2875 for(x=0; x<7; x++){ |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2876 score2+= ABS( s1[x ] - s1[x +stride] |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2877 - s1[x+1] + s1[x+1+stride]) |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2878 -ABS( s2[x ] - s2[x +stride] |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2879 - s2[x+1] + s2[x+1+stride]); |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2880 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2881 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2882 s1+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2883 s2+= stride; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2884 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2885 |
2066 | 2886 if(c) return score1 + ABS(score2)*c->avctx->nsse_weight; |
2887 else return score1 + ABS(score2)*8; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2888 } |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2889 |
1784 | 2890 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ |
2891 int i; | |
2892 unsigned int sum=0; | |
2893 | |
2894 for(i=0; i<8*8; i++){ | |
2895 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT)); | |
2896 int w= weight[i]; | |
2897 b>>= RECON_SHIFT; | |
2898 assert(-512<b && b<512); | |
2899 | |
2900 sum += (w*b)*(w*b)>>4; | |
2901 } | |
2902 return sum>>2; | |
2903 } | |
2904 | |
2905 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){ | |
2906 int i; | |
2907 | |
2908 for(i=0; i<8*8; i++){ | |
2909 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT); | |
2910 } | |
2911 } | |
2912 | |
1100 | 2913 /** |
2914 * permutes an 8x8 block. | |
1101 | 2915 * @param block the block which will be permuted according to the given permutation vector |
1100 | 2916 * @param permutation the permutation vector |
2917 * @param last the last non zero coefficient in scantable order, used to speed the permutation up | |
1101 | 2918 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not |
2919 * (inverse) permutated to scantable order! | |
1100 | 2920 */ |
1064 | 2921 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last) |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
2922 { |
764 | 2923 int i; |
945 | 2924 DCTELEM temp[64]; |
764 | 2925 |
2926 if(last<=0) return; | |
882 | 2927 //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
2928 |
764 | 2929 for(i=0; i<=last; i++){ |
2930 const int j= scantable[i]; | |
2931 temp[j]= block[j]; | |
2932 block[j]=0; | |
2933 } | |
2934 | |
2935 for(i=0; i<=last; i++){ | |
2936 const int j= scantable[i]; | |
2937 const int perm_j= permutation[j]; | |
2938 block[perm_j]= temp[j]; | |
2939 } | |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
2940 } |
34 | 2941 |
1729 | 2942 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ |
2943 return 0; | |
2944 } | |
2945 | |
2946 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ | |
2947 int i; | |
2948 | |
2949 memset(cmp, 0, sizeof(void*)*5); | |
2950 | |
2951 for(i=0; i<5; i++){ | |
2952 switch(type&0xFF){ | |
2953 case FF_CMP_SAD: | |
2954 cmp[i]= c->sad[i]; | |
2955 break; | |
2956 case FF_CMP_SATD: | |
2957 cmp[i]= c->hadamard8_diff[i]; | |
2958 break; | |
2959 case FF_CMP_SSE: | |
2960 cmp[i]= c->sse[i]; | |
2961 break; | |
2962 case FF_CMP_DCT: | |
2963 cmp[i]= c->dct_sad[i]; | |
2964 break; | |
2382 | 2965 case FF_CMP_DCTMAX: |
2966 cmp[i]= c->dct_max[i]; | |
2967 break; | |
1729 | 2968 case FF_CMP_PSNR: |
2969 cmp[i]= c->quant_psnr[i]; | |
2970 break; | |
2971 case FF_CMP_BIT: | |
2972 cmp[i]= c->bit[i]; | |
2973 break; | |
2974 case FF_CMP_RD: | |
2975 cmp[i]= c->rd[i]; | |
2976 break; | |
2977 case FF_CMP_VSAD: | |
2978 cmp[i]= c->vsad[i]; | |
2979 break; | |
2980 case FF_CMP_VSSE: | |
2981 cmp[i]= c->vsse[i]; | |
2982 break; | |
2983 case FF_CMP_ZERO: | |
2984 cmp[i]= zero_cmp; | |
2985 break; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2986 case FF_CMP_NSSE: |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2987 cmp[i]= c->nsse[i]; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
2988 break; |
2184 | 2989 case FF_CMP_W53: |
2990 cmp[i]= c->w53[i]; | |
2991 break; | |
2992 case FF_CMP_W97: | |
2993 cmp[i]= c->w97[i]; | |
2994 break; | |
1729 | 2995 default: |
2996 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); | |
2997 } | |
2998 } | |
2999 } | |
3000 | |
1101 | 3001 /** |
3002 * memset(blocks, 0, sizeof(DCTELEM)*6*64) | |
3003 */ | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3004 static void clear_blocks_c(DCTELEM *blocks) |
296 | 3005 { |
3006 memset(blocks, 0, sizeof(DCTELEM)*6*64); | |
3007 } | |
3008 | |
866 | 3009 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ |
3010 int i; | |
996
ad44196ea483
add/diff_bytes bugfix patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>)
michaelni
parents:
984
diff
changeset
|
3011 for(i=0; i+7<w; i+=8){ |
866 | 3012 dst[i+0] += src[i+0]; |
3013 dst[i+1] += src[i+1]; | |
3014 dst[i+2] += src[i+2]; | |
3015 dst[i+3] += src[i+3]; | |
3016 dst[i+4] += src[i+4]; | |
3017 dst[i+5] += src[i+5]; | |
3018 dst[i+6] += src[i+6]; | |
3019 dst[i+7] += src[i+7]; | |
3020 } | |
3021 for(; i<w; i++) | |
3022 dst[i+0] += src[i+0]; | |
3023 } | |
3024 | |
3025 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ | |
3026 int i; | |
996
ad44196ea483
add/diff_bytes bugfix patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>)
michaelni
parents:
984
diff
changeset
|
3027 for(i=0; i+7<w; i+=8){ |
866 | 3028 dst[i+0] = src1[i+0]-src2[i+0]; |
3029 dst[i+1] = src1[i+1]-src2[i+1]; | |
3030 dst[i+2] = src1[i+2]-src2[i+2]; | |
3031 dst[i+3] = src1[i+3]-src2[i+3]; | |
3032 dst[i+4] = src1[i+4]-src2[i+4]; | |
3033 dst[i+5] = src1[i+5]-src2[i+5]; | |
3034 dst[i+6] = src1[i+6]-src2[i+6]; | |
3035 dst[i+7] = src1[i+7]-src2[i+7]; | |
3036 } | |
3037 for(; i<w; i++) | |
3038 dst[i+0] = src1[i+0]-src2[i+0]; | |
3039 } | |
3040 | |
1527 | 3041 static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ |
3042 int i; | |
3043 uint8_t l, lt; | |
3044 | |
3045 l= *left; | |
3046 lt= *left_top; | |
3047 | |
3048 for(i=0; i<w; i++){ | |
3049 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); | |
3050 lt= src1[i]; | |
3051 l= src2[i]; | |
3052 dst[i]= l - pred; | |
3053 } | |
3054 | |
3055 *left= l; | |
3056 *left_top= lt; | |
3057 } | |
3058 | |
936 | 3059 #define BUTTERFLY2(o1,o2,i1,i2) \ |
3060 o1= (i1)+(i2);\ | |
3061 o2= (i1)-(i2); | |
3062 | |
3063 #define BUTTERFLY1(x,y) \ | |
3064 {\ | |
3065 int a,b;\ | |
3066 a= x;\ | |
3067 b= y;\ | |
3068 x= a+b;\ | |
3069 y= a-b;\ | |
3070 } | |
3071 | |
3072 #define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y))) | |
3073 | |
1708 | 3074 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ |
936 | 3075 int i; |
3076 int temp[64]; | |
3077 int sum=0; | |
1708 | 3078 |
3079 assert(h==8); | |
936 | 3080 |
3081 for(i=0; i<8; i++){ | |
3082 //FIXME try pointer walks | |
3083 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]); | |
3084 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]); | |
3085 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]); | |
3086 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]); | |
3087 | |
3088 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); | |
3089 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | |
3090 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | |
3091 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | |
3092 | |
3093 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); | |
3094 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | |
3095 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | |
3096 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | |
3097 } | |
3098 | |
3099 for(i=0; i<8; i++){ | |
3100 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | |
3101 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | |
3102 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | |
3103 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | |
3104 | |
3105 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); | |
3106 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | |
3107 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | |
3108 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | |
3109 | |
3110 sum += | |
3111 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) | |
3112 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | |
3113 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | |
3114 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | |
3115 } | |
3116 #if 0 | |
3117 static int maxi=0; | |
3118 if(sum>maxi){ | |
3119 maxi=sum; | |
3120 printf("MAX:%d\n", maxi); | |
3121 } | |
3122 #endif | |
3123 return sum; | |
3124 } | |
3125 | |
1729 | 3126 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){ |
936 | 3127 int i; |
3128 int temp[64]; | |
3129 int sum=0; | |
1729 | 3130 |
3131 assert(h==8); | |
3132 | |
936 | 3133 for(i=0; i<8; i++){ |
3134 //FIXME try pointer walks | |
1729 | 3135 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]); |
3136 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]); | |
3137 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]); | |
3138 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]); | |
936 | 3139 |
3140 BUTTERFLY1(temp[8*i+0], temp[8*i+2]); | |
3141 BUTTERFLY1(temp[8*i+1], temp[8*i+3]); | |
3142 BUTTERFLY1(temp[8*i+4], temp[8*i+6]); | |
3143 BUTTERFLY1(temp[8*i+5], temp[8*i+7]); | |
3144 | |
3145 BUTTERFLY1(temp[8*i+0], temp[8*i+4]); | |
3146 BUTTERFLY1(temp[8*i+1], temp[8*i+5]); | |
3147 BUTTERFLY1(temp[8*i+2], temp[8*i+6]); | |
3148 BUTTERFLY1(temp[8*i+3], temp[8*i+7]); | |
3149 } | |
3150 | |
3151 for(i=0; i<8; i++){ | |
3152 BUTTERFLY1(temp[8*0+i], temp[8*1+i]); | |
3153 BUTTERFLY1(temp[8*2+i], temp[8*3+i]); | |
3154 BUTTERFLY1(temp[8*4+i], temp[8*5+i]); | |
3155 BUTTERFLY1(temp[8*6+i], temp[8*7+i]); | |
3156 | |
3157 BUTTERFLY1(temp[8*0+i], temp[8*2+i]); | |
3158 BUTTERFLY1(temp[8*1+i], temp[8*3+i]); | |
3159 BUTTERFLY1(temp[8*4+i], temp[8*6+i]); | |
3160 BUTTERFLY1(temp[8*5+i], temp[8*7+i]); | |
3161 | |
3162 sum += | |
3163 BUTTERFLYA(temp[8*0+i], temp[8*4+i]) | |
3164 +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) | |
3165 +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) | |
3166 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); | |
3167 } | |
3168 | |
1729 | 3169 sum -= ABS(temp[8*0] + temp[8*4]); // -mean |
3170 | |
936 | 3171 return sum; |
3172 } | |
3173 | |
1708 | 3174 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
936 | 3175 MpegEncContext * const s= (MpegEncContext *)c; |
1016 | 3176 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; |
3177 DCTELEM * const temp= (DCTELEM*)aligned_temp; | |
936 | 3178 int sum=0, i; |
1708 | 3179 |
3180 assert(h==8); | |
936 | 3181 |
3182 s->dsp.diff_pixels(temp, src1, src2, stride); | |
1092 | 3183 s->dsp.fdct(temp); |
936 | 3184 |
3185 for(i=0; i<64; i++) | |
3186 sum+= ABS(temp[i]); | |
3187 | |
3188 return sum; | |
3189 } | |
3190 | |
2382 | 3191 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
3192 MpegEncContext * const s= (MpegEncContext *)c; | |
3193 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; | |
3194 DCTELEM * const temp= (DCTELEM*)aligned_temp; | |
3195 int sum=0, i; | |
3196 | |
3197 assert(h==8); | |
3198 | |
3199 s->dsp.diff_pixels(temp, src1, src2, stride); | |
3200 s->dsp.fdct(temp); | |
3201 | |
3202 for(i=0; i<64; i++) | |
3203 sum= FFMAX(sum, ABS(temp[i])); | |
3204 | |
3205 return sum; | |
3206 } | |
3207 | |
1008 | 3208 void simple_idct(DCTELEM *block); //FIXME |
936 | 3209 |
1708 | 3210 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
936 | 3211 MpegEncContext * const s= (MpegEncContext *)c; |
1016 | 3212 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8]; |
3213 DCTELEM * const temp= (DCTELEM*)aligned_temp; | |
3214 DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; | |
936 | 3215 int sum=0, i; |
3216 | |
1708 | 3217 assert(h==8); |
936 | 3218 s->mb_intra=0; |
3219 | |
3220 s->dsp.diff_pixels(temp, src1, src2, stride); | |
3221 | |
3222 memcpy(bak, temp, 64*sizeof(DCTELEM)); | |
3223 | |
1013 | 3224 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
1689 | 3225 s->dct_unquantize_inter(s, temp, 0, s->qscale); |
936 | 3226 simple_idct(temp); //FIXME |
3227 | |
3228 for(i=0; i<64; i++) | |
3229 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); | |
3230 | |
3231 return sum; | |
3232 } | |
3233 | |
1708 | 3234 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
1007 | 3235 MpegEncContext * const s= (MpegEncContext *)c; |
1064 | 3236 const uint8_t *scantable= s->intra_scantable.permutated; |
1016 | 3237 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; |
3238 uint64_t __align8 aligned_bak[stride]; | |
3239 DCTELEM * const temp= (DCTELEM*)aligned_temp; | |
3240 uint8_t * const bak= (uint8_t*)aligned_bak; | |
1007 | 3241 int i, last, run, bits, level, distoration, start_i; |
3242 const int esc_length= s->ac_esc_length; | |
3243 uint8_t * length; | |
3244 uint8_t * last_length; | |
3245 | |
1708 | 3246 assert(h==8); |
3247 | |
1007 | 3248 for(i=0; i<8; i++){ |
3249 ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0]; | |
3250 ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1]; | |
3251 } | |
3252 | |
3253 s->dsp.diff_pixels(temp, src1, src2, stride); | |
3254 | |
1013 | 3255 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
3256 | |
3257 bits=0; | |
1007 | 3258 |
1013 | 3259 if (s->mb_intra) { |
3260 start_i = 1; | |
3261 length = s->intra_ac_vlc_length; | |
3262 last_length= s->intra_ac_vlc_last_length; | |
3263 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma | |
3264 } else { | |
3265 start_i = 0; | |
3266 length = s->inter_ac_vlc_length; | |
3267 last_length= s->inter_ac_vlc_last_length; | |
3268 } | |
3269 | |
3270 if(last>=start_i){ | |
1007 | 3271 run=0; |
3272 for(i=start_i; i<last; i++){ | |
3273 int j= scantable[i]; | |
3274 level= temp[j]; | |
3275 | |
3276 if(level){ | |
3277 level+=64; | |
3278 if((level&(~127)) == 0){ | |
3279 bits+= length[UNI_AC_ENC_INDEX(run, level)]; | |
3280 }else | |
3281 bits+= esc_length; | |
3282 run=0; | |
3283 }else | |
3284 run++; | |
3285 } | |
3286 i= scantable[last]; | |
1011 | 3287 |
3288 level= temp[i] + 64; | |
3289 | |
3290 assert(level - 64); | |
1007 | 3291 |
3292 if((level&(~127)) == 0){ | |
3293 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | |
3294 }else | |
3295 bits+= esc_length; | |
3296 | |
1013 | 3297 } |
3298 | |
3299 if(last>=0){ | |
1689 | 3300 if(s->mb_intra) |
3301 s->dct_unquantize_intra(s, temp, 0, s->qscale); | |
3302 else | |
3303 s->dct_unquantize_inter(s, temp, 0, s->qscale); | |
1007 | 3304 } |
3305 | |
1092 | 3306 s->dsp.idct_add(bak, stride, temp); |
1007 | 3307 |
1708 | 3308 distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8); |
1007 | 3309 |
1013 | 3310 return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7); |
1007 | 3311 } |
3312 | |
1708 | 3313 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ |
1007 | 3314 MpegEncContext * const s= (MpegEncContext *)c; |
1064 | 3315 const uint8_t *scantable= s->intra_scantable.permutated; |
1016 | 3316 uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; |
3317 DCTELEM * const temp= (DCTELEM*)aligned_temp; | |
1007 | 3318 int i, last, run, bits, level, start_i; |
3319 const int esc_length= s->ac_esc_length; | |
3320 uint8_t * length; | |
3321 uint8_t * last_length; | |
1708 | 3322 |
3323 assert(h==8); | |
1013 | 3324 |
3325 s->dsp.diff_pixels(temp, src1, src2, stride); | |
1007 | 3326 |
1013 | 3327 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); |
3328 | |
3329 bits=0; | |
1007 | 3330 |
3331 if (s->mb_intra) { | |
1013 | 3332 start_i = 1; |
1007 | 3333 length = s->intra_ac_vlc_length; |
3334 last_length= s->intra_ac_vlc_last_length; | |
1013 | 3335 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma |
1007 | 3336 } else { |
3337 start_i = 0; | |
3338 length = s->inter_ac_vlc_length; | |
3339 last_length= s->inter_ac_vlc_last_length; | |
3340 } | |
3341 | |
1013 | 3342 if(last>=start_i){ |
1007 | 3343 run=0; |
3344 for(i=start_i; i<last; i++){ | |
3345 int j= scantable[i]; | |
3346 level= temp[j]; | |
3347 | |
3348 if(level){ | |
3349 level+=64; | |
3350 if((level&(~127)) == 0){ | |
3351 bits+= length[UNI_AC_ENC_INDEX(run, level)]; | |
3352 }else | |
3353 bits+= esc_length; | |
3354 run=0; | |
3355 }else | |
3356 run++; | |
3357 } | |
3358 i= scantable[last]; | |
1013 | 3359 |
3360 level= temp[i] + 64; | |
1007 | 3361 |
1013 | 3362 assert(level - 64); |
1007 | 3363 |
3364 if((level&(~127)) == 0){ | |
3365 bits+= last_length[UNI_AC_ENC_INDEX(run, level)]; | |
3366 }else | |
3367 bits+= esc_length; | |
3368 } | |
3369 | |
3370 return bits; | |
3371 } | |
3372 | |
1729 | 3373 static int vsad_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ |
3374 int score=0; | |
3375 int x,y; | |
3376 | |
3377 for(y=1; y<h; y++){ | |
3378 for(x=0; x<16; x+=4){ | |
3379 score+= ABS(s[x ] - s[x +stride]) + ABS(s[x+1] - s[x+1+stride]) | |
3380 +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]); | |
3381 } | |
3382 s+= stride; | |
3383 } | |
3384 | |
3385 return score; | |
3386 } | |
3387 | |
3388 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | |
3389 int score=0; | |
3390 int x,y; | |
3391 | |
3392 for(y=1; y<h; y++){ | |
3393 for(x=0; x<16; x++){ | |
3394 score+= ABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); | |
3395 } | |
3396 s1+= stride; | |
3397 s2+= stride; | |
3398 } | |
3399 | |
3400 return score; | |
3401 } | |
3402 | |
3403 #define SQ(a) ((a)*(a)) | |
3404 static int vsse_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ | |
3405 int score=0; | |
3406 int x,y; | |
3407 | |
3408 for(y=1; y<h; y++){ | |
3409 for(x=0; x<16; x+=4){ | |
3410 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) | |
3411 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); | |
3412 } | |
3413 s+= stride; | |
3414 } | |
3415 | |
3416 return score; | |
3417 } | |
3418 | |
3419 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){ | |
3420 int score=0; | |
3421 int x,y; | |
3422 | |
3423 for(y=1; y<h; y++){ | |
3424 for(x=0; x<16; x++){ | |
3425 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]); | |
3426 } | |
3427 s1+= stride; | |
3428 s2+= stride; | |
3429 } | |
3430 | |
3431 return score; | |
3432 } | |
3433 | |
1708 | 3434 WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) |
1729 | 3435 WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) |
1708 | 3436 WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) |
2382 | 3437 WARPER8_16_SQ(dct_max8x8_c, dct_max16_c) |
1708 | 3438 WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) |
3439 WARPER8_16_SQ(rd8x8_c, rd16_c) | |
3440 WARPER8_16_SQ(bit8x8_c, bit16_c) | |
936 | 3441 |
1092 | 3442 /* XXX: those functions should be suppressed ASAP when all IDCTs are |
3443 converted */ | |
3444 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) | |
3445 { | |
3446 j_rev_dct (block); | |
3447 put_pixels_clamped_c(block, dest, line_size); | |
3448 } | |
3449 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) | |
3450 { | |
3451 j_rev_dct (block); | |
3452 add_pixels_clamped_c(block, dest, line_size); | |
3453 } | |
3454 | |
2256 | 3455 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block) |
3456 { | |
3457 j_rev_dct4 (block); | |
3458 put_pixels_clamped4_c(block, dest, line_size); | |
3459 } | |
3460 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block) | |
3461 { | |
3462 j_rev_dct4 (block); | |
3463 add_pixels_clamped4_c(block, dest, line_size); | |
3464 } | |
3465 | |
2257 | 3466 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block) |
3467 { | |
3468 j_rev_dct2 (block); | |
3469 put_pixels_clamped2_c(block, dest, line_size); | |
3470 } | |
3471 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) | |
3472 { | |
3473 j_rev_dct2 (block); | |
3474 add_pixels_clamped2_c(block, dest, line_size); | |
3475 } | |
3476 | |
2259 | 3477 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) |
3478 { | |
3479 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
3480 | |
3481 dest[0] = cm[(block[0] + 4)>>3]; | |
3482 } | |
3483 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) | |
3484 { | |
3485 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
3486 | |
3487 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; | |
3488 } | |
3489 | |
1201 | 3490 /* init static data */ |
3491 void dsputil_static_init(void) | |
0 | 3492 { |
751 | 3493 int i; |
0 | 3494 |
1201 | 3495 for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; |
3496 for(i=0;i<MAX_NEG_CROP;i++) { | |
3497 cropTbl[i] = 0; | |
3498 cropTbl[i + MAX_NEG_CROP + 256] = 255; | |
3499 } | |
3500 | |
3501 for(i=0;i<512;i++) { | |
3502 squareTbl[i] = (i - 256) * (i - 256); | |
3503 } | |
3504 | |
3505 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; | |
3506 } | |
0 | 3507 |
861 | 3508 |
1201 | 3509 void dsputil_init(DSPContext* c, AVCodecContext *avctx) |
3510 { | |
3511 int i; | |
0 | 3512 |
1092 | 3513 #ifdef CONFIG_ENCODERS |
1567 | 3514 if(avctx->dct_algo==FF_DCT_FASTINT) { |
1092 | 3515 c->fdct = fdct_ifast; |
1571
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1567
diff
changeset
|
3516 c->fdct248 = fdct_ifast248; |
1567 | 3517 } |
3518 else if(avctx->dct_algo==FF_DCT_FAAN) { | |
1557 | 3519 c->fdct = ff_faandct; |
1571
aa4dc16c0f18
* adding integer/floating point AAN implementations for DCT 2-4-8
romansh
parents:
1567
diff
changeset
|
3520 c->fdct248 = ff_faandct248; |
1567 | 3521 } |
3522 else { | |
1092 | 3523 c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default |
1567 | 3524 c->fdct248 = ff_fdct248_islow; |
3525 } | |
1092 | 3526 #endif //CONFIG_ENCODERS |
3527 | |
2256 | 3528 if(avctx->lowres==1){ |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3529 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO){ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3530 c->idct_put= ff_jref_idct4_put; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3531 c->idct_add= ff_jref_idct4_add; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3532 }else{ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3533 c->idct_put= ff_h264_lowres_idct_put_c; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3534 c->idct_add= ff_h264_lowres_idct_add_c; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3535 } |
2256 | 3536 c->idct = j_rev_dct4; |
1092 | 3537 c->idct_permutation_type= FF_NO_IDCT_PERM; |
2257 | 3538 }else if(avctx->lowres==2){ |
3539 c->idct_put= ff_jref_idct2_put; | |
3540 c->idct_add= ff_jref_idct2_add; | |
3541 c->idct = j_rev_dct2; | |
3542 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
2259 | 3543 }else if(avctx->lowres==3){ |
3544 c->idct_put= ff_jref_idct1_put; | |
3545 c->idct_add= ff_jref_idct1_add; | |
3546 c->idct = j_rev_dct1; | |
3547 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
2256 | 3548 }else{ |
3549 if(avctx->idct_algo==FF_IDCT_INT){ | |
3550 c->idct_put= ff_jref_idct_put; | |
3551 c->idct_add= ff_jref_idct_add; | |
3552 c->idct = j_rev_dct; | |
3553 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; | |
3554 }else{ //accurate/default | |
3555 c->idct_put= simple_idct_put; | |
3556 c->idct_add= simple_idct_add; | |
3557 c->idct = simple_idct; | |
3558 c->idct_permutation_type= FF_NO_IDCT_PERM; | |
3559 } | |
1092 | 3560 } |
3561 | |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3562 c->h264_idct_add= ff_h264_idct_add_c; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
2259
diff
changeset
|
3563 |
1866
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
3564 /* VP3 DSP support */ |
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
3565 c->vp3_dsp_init = vp3_dsp_init_c; |
1977 | 3566 c->vp3_idct = vp3_idct_c; |
1866
1755f959ab7f
seperated out the C-based VP3 DSP functions into a different file; also
melanson
parents:
1864
diff
changeset
|
3567 |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3568 c->get_pixels = get_pixels_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3569 c->diff_pixels = diff_pixels_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3570 c->put_pixels_clamped = put_pixels_clamped_c; |
1984
ef919e9ef73e
separate out put_signed_pixels_clamped() into its own function and
melanson
parents:
1977
diff
changeset
|
3571 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3572 c->add_pixels_clamped = add_pixels_clamped_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3573 c->gmc1 = gmc1_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3574 c->gmc = gmc_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3575 c->clear_blocks = clear_blocks_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3576 c->pix_sum = pix_sum_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3577 c->pix_norm1 = pix_norm1_c; |
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3578 |
859 | 3579 /* TODO [0] 16 [1] 8 */ |
1708 | 3580 c->pix_abs[0][0] = pix_abs16_c; |
3581 c->pix_abs[0][1] = pix_abs16_x2_c; | |
3582 c->pix_abs[0][2] = pix_abs16_y2_c; | |
3583 c->pix_abs[0][3] = pix_abs16_xy2_c; | |
3584 c->pix_abs[1][0] = pix_abs8_c; | |
3585 c->pix_abs[1][1] = pix_abs8_x2_c; | |
3586 c->pix_abs[1][2] = pix_abs8_y2_c; | |
3587 c->pix_abs[1][3] = pix_abs8_xy2_c; | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3588 |
859 | 3589 #define dspfunc(PFX, IDX, NUM) \ |
3590 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \ | |
3591 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \ | |
3592 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \ | |
3593 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c | |
853
eacc2dd8fd9d
* using DSPContext - so each codec could use its local (sub)set of CPU extension
kabi
parents:
764
diff
changeset
|
3594 |
859 | 3595 dspfunc(put, 0, 16); |
3596 dspfunc(put_no_rnd, 0, 16); | |
3597 dspfunc(put, 1, 8); | |
3598 dspfunc(put_no_rnd, 1, 8); | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3599 dspfunc(put, 2, 4); |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3600 dspfunc(put, 3, 2); |
0 | 3601 |
859 | 3602 dspfunc(avg, 0, 16); |
3603 dspfunc(avg_no_rnd, 0, 16); | |
3604 dspfunc(avg, 1, 8); | |
3605 dspfunc(avg_no_rnd, 1, 8); | |
1319 | 3606 dspfunc(avg, 2, 4); |
3607 dspfunc(avg, 3, 2); | |
859 | 3608 #undef dspfunc |
857 | 3609 |
1864 | 3610 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c; |
3611 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c; | |
3612 | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3613 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3614 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3615 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3616 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3617 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3618 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3619 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3620 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3621 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; |
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3622 |
1319 | 3623 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; |
3624 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; | |
3625 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; | |
3626 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; | |
3627 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; | |
3628 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; | |
3629 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; | |
3630 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; | |
3631 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; | |
3632 | |
859 | 3633 #define dspfunc(PFX, IDX, NUM) \ |
3634 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \ | |
3635 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \ | |
3636 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \ | |
3637 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \ | |
3638 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \ | |
3639 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \ | |
3640 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \ | |
3641 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \ | |
3642 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \ | |
3643 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \ | |
3644 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \ | |
3645 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \ | |
3646 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \ | |
3647 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \ | |
3648 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \ | |
3649 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c | |
857 | 3650 |
859 | 3651 dspfunc(put_qpel, 0, 16); |
3652 dspfunc(put_no_rnd_qpel, 0, 16); | |
3653 | |
3654 dspfunc(avg_qpel, 0, 16); | |
3655 /* dspfunc(avg_no_rnd_qpel, 0, 16); */ | |
857 | 3656 |
859 | 3657 dspfunc(put_qpel, 1, 8); |
3658 dspfunc(put_no_rnd_qpel, 1, 8); | |
3659 | |
3660 dspfunc(avg_qpel, 1, 8); | |
3661 /* dspfunc(avg_no_rnd_qpel, 1, 8); */ | |
1168 | 3662 |
3663 dspfunc(put_h264_qpel, 0, 16); | |
3664 dspfunc(put_h264_qpel, 1, 8); | |
3665 dspfunc(put_h264_qpel, 2, 4); | |
3666 dspfunc(avg_h264_qpel, 0, 16); | |
3667 dspfunc(avg_h264_qpel, 1, 8); | |
3668 dspfunc(avg_h264_qpel, 2, 4); | |
3669 | |
859 | 3670 #undef dspfunc |
1168 | 3671 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c; |
3672 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c; | |
3673 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c; | |
3674 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c; | |
3675 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c; | |
3676 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c; | |
857 | 3677 |
2415 | 3678 c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c; |
3679 c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c; | |
3680 c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c; | |
3681 c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c; | |
3682 c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c; | |
3683 c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c; | |
3684 c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c; | |
3685 c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c; | |
3686 c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c; | |
3687 c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c; | |
3688 c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c; | |
3689 c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c; | |
3690 c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c; | |
3691 c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c; | |
3692 c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c; | |
3693 c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c; | |
3694 c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c; | |
3695 c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c; | |
3696 c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c; | |
3697 c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c; | |
3698 | |
936 | 3699 c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; |
3700 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; | |
3701 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; | |
3702 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; | |
3703 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; | |
3704 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; | |
3705 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; | |
3706 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; | |
1267
85b71f9f7450
moving the svq3 motion compensation stuff to dsputil (this also means that existing optimized halfpel code is used now ...)
michaelni
parents:
1264
diff
changeset
|
3707 |
1708 | 3708 #define SET_CMP_FUNC(name) \ |
3709 c->name[0]= name ## 16_c;\ | |
3710 c->name[1]= name ## 8x8_c; | |
936 | 3711 |
1708 | 3712 SET_CMP_FUNC(hadamard8_diff) |
1729 | 3713 c->hadamard8_diff[4]= hadamard8_intra16_c; |
1708 | 3714 SET_CMP_FUNC(dct_sad) |
2382 | 3715 SET_CMP_FUNC(dct_max) |
1708 | 3716 c->sad[0]= pix_abs16_c; |
3717 c->sad[1]= pix_abs8_c; | |
3718 c->sse[0]= sse16_c; | |
3719 c->sse[1]= sse8_c; | |
2184 | 3720 c->sse[2]= sse4_c; |
1708 | 3721 SET_CMP_FUNC(quant_psnr) |
3722 SET_CMP_FUNC(rd) | |
3723 SET_CMP_FUNC(bit) | |
1729 | 3724 c->vsad[0]= vsad16_c; |
3725 c->vsad[4]= vsad_intra16_c; | |
3726 c->vsse[0]= vsse16_c; | |
3727 c->vsse[4]= vsse_intra16_c; | |
2065
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3728 c->nsse[0]= nsse16_c; |
9e4bebc39ade
noise preserving sum of squares comparission function
michael
parents:
2045
diff
changeset
|
3729 c->nsse[1]= nsse8_c; |
2184 | 3730 c->w53[0]= w53_16_c; |
3731 c->w53[1]= w53_8_c; | |
3732 c->w97[0]= w97_16_c; | |
3733 c->w97[1]= w97_8_c; | |
3734 | |
866 | 3735 c->add_bytes= add_bytes_c; |
3736 c->diff_bytes= diff_bytes_c; | |
1527 | 3737 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; |
1273 | 3738 c->bswap_buf= bswap_buf; |
1644 | 3739 |
3740 c->h263_h_loop_filter= h263_h_loop_filter_c; | |
3741 c->h263_v_loop_filter= h263_v_loop_filter_c; | |
1784 | 3742 |
2045 | 3743 c->h261_loop_filter= h261_loop_filter_c; |
2044
b6f2add2511e
h261 decoder by (Maarten Daniels <maarten.daniels at student dot luc dot ac dot be>)
michael
parents:
1984
diff
changeset
|
3744 |
1784 | 3745 c->try_8x8basis= try_8x8basis_c; |
3746 c->add_8x8basis= add_8x8basis_c; | |
866 | 3747 |
2 | 3748 #ifdef HAVE_MMX |
1092 | 3749 dsputil_init_mmx(c, avctx); |
0 | 3750 #endif |
62 | 3751 #ifdef ARCH_ARMV4L |
1092 | 3752 dsputil_init_armv4l(c, avctx); |
62 | 3753 #endif |
88 | 3754 #ifdef HAVE_MLIB |
1092 | 3755 dsputil_init_mlib(c, avctx); |
88 | 3756 #endif |
1959
55b7435c59b8
VIS optimized motion compensation code. by (David S. Miller <davem at redhat dot com>)
michael
parents:
1866
diff
changeset
|
3757 #ifdef ARCH_SPARC |
55b7435c59b8
VIS optimized motion compensation code. by (David S. Miller <davem at redhat dot com>)
michael
parents:
1866
diff
changeset
|
3758 dsputil_init_vis(c,avctx); |
55b7435c59b8
VIS optimized motion compensation code. by (David S. Miller <davem at redhat dot com>)
michael
parents:
1866
diff
changeset
|
3759 #endif |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
209
diff
changeset
|
3760 #ifdef ARCH_ALPHA |
1092 | 3761 dsputil_init_alpha(c, avctx); |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
209
diff
changeset
|
3762 #endif |
623
92e99e506920
first cut at altivec support on darwin patch by (Brian Foley <bfoley at compsoc dot nuigalway dot ie>)
michaelni
parents:
612
diff
changeset
|
3763 #ifdef ARCH_POWERPC |
1092 | 3764 dsputil_init_ppc(c, avctx); |
626
23a093d6e450
patch by Heliodoro Tammaro <helio at interactives dot org>
michaelni
parents:
625
diff
changeset
|
3765 #endif |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
676
diff
changeset
|
3766 #ifdef HAVE_MMI |
1092 | 3767 dsputil_init_mmi(c, avctx); |
689
efcbfbd18864
ps2 idct patch by (Leon van Stuivenberg <leonvs at iae dot nl>)
michaelni
parents:
676
diff
changeset
|
3768 #endif |
1259
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1201
diff
changeset
|
3769 #ifdef ARCH_SH4 |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1201
diff
changeset
|
3770 dsputil_init_sh4(c,avctx); |
e8c3884f2c7e
sh4 optimized idct & bswap patch by (BERO <bero at geocities dot co dot jp>)
michaelni
parents:
1201
diff
changeset
|
3771 #endif |
1092 | 3772 |
3773 switch(c->idct_permutation_type){ | |
3774 case FF_NO_IDCT_PERM: | |
3775 for(i=0; i<64; i++) | |
3776 c->idct_permutation[i]= i; | |
3777 break; | |
3778 case FF_LIBMPEG2_IDCT_PERM: | |
3779 for(i=0; i<64; i++) | |
3780 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); | |
3781 break; | |
3782 case FF_SIMPLE_IDCT_PERM: | |
3783 for(i=0; i<64; i++) | |
3784 c->idct_permutation[i]= simple_mmx_permutation[i]; | |
3785 break; | |
3786 case FF_TRANSPOSE_IDCT_PERM: | |
3787 for(i=0; i<64; i++) | |
3788 c->idct_permutation[i]= ((i&7)<<3) | (i>>3); | |
3789 break; | |
3790 default: | |
1598
932d306bf1dc
av_log() patch by (Michel Bardiaux <mbardiaux at peaktime dot be>)
michael
parents:
1571
diff
changeset
|
3791 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); |
1092 | 3792 } |
0 | 3793 } |
252
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
220
diff
changeset
|
3794 |