comparison dsputil.c @ 997:4dfe15ae0078 libavcodec

sse16 & pix_norm1 optimization patch by (Felix von Leitner <felix-ffmpeg at fefe dot de>) (with some modifications)
author michaelni
date Sat, 11 Jan 2003 22:32:56 +0000
parents ad44196ea483
children b2cf2a1d9a51
comparison
equal deleted inserted replaced
996:ad44196ea483 997:4dfe15ae0078
127 UINT32 *sq = squareTbl + 256; 127 UINT32 *sq = squareTbl + 256;
128 128
129 s = 0; 129 s = 0;
130 for (i = 0; i < 16; i++) { 130 for (i = 0; i < 16; i++) {
131 for (j = 0; j < 16; j += 8) { 131 for (j = 0; j < 16; j += 8) {
132 #if 0
132 s += sq[pix[0]]; 133 s += sq[pix[0]];
133 s += sq[pix[1]]; 134 s += sq[pix[1]];
134 s += sq[pix[2]]; 135 s += sq[pix[2]];
135 s += sq[pix[3]]; 136 s += sq[pix[3]];
136 s += sq[pix[4]]; 137 s += sq[pix[4]];
137 s += sq[pix[5]]; 138 s += sq[pix[5]];
138 s += sq[pix[6]]; 139 s += sq[pix[6]];
139 s += sq[pix[7]]; 140 s += sq[pix[7]];
141 #else
142 #if LONG_MAX > 2147483647
143 register uint64_t x=*(uint64_t*)pix;
144 s += sq[x&0xff];
145 s += sq[(x>>8)&0xff];
146 s += sq[(x>>16)&0xff];
147 s += sq[(x>>24)&0xff];
148 s += sq[(x>>32)&0xff];
149 s += sq[(x>>40)&0xff];
150 s += sq[(x>>48)&0xff];
151 s += sq[(x>>56)&0xff];
152 #else
153 register uint32_t x=*(uint32_t*)pix;
154 s += sq[x&0xff];
155 s += sq[(x>>8)&0xff];
156 s += sq[(x>>16)&0xff];
157 s += sq[(x>>24)&0xff];
158 x=*(uint32_t*)(pix+4);
159 s += sq[x&0xff];
160 s += sq[(x>>8)&0xff];
161 s += sq[(x>>16)&0xff];
162 s += sq[(x>>24)&0xff];
163 #endif
164 #endif
140 pix += 8; 165 pix += 8;
141 } 166 }
142 pix += line_size - 16; 167 pix += line_size - 16;
143 } 168 }
144 return s; 169 return s;
172 UINT32 *sq = squareTbl + 256; 197 UINT32 *sq = squareTbl + 256;
173 198
174 s = 0; 199 s = 0;
175 for (i = 0; i < 16; i++) { 200 for (i = 0; i < 16; i++) {
176 for (j = 0; j < 16; j += 8) { 201 for (j = 0; j < 16; j += 8) {
202 #if 1
203 #if LONG_MAX > 2147483647
204 uint64_t x,y;
205 x=*(uint64_t*)pix1;
206 y=*(uint64_t*)pix2;
207
208 s += sq[(x&0xff) - (y&0xff)];
209 s += sq[((x>>8)&0xff) - ((y>>8)&0xff)];
210 s += sq[((x>>16)&0xff) - ((y>>16)&0xff)];
211 s += sq[((x>>24)&0xff) - ((y>>24)&0xff)];
212 s += sq[((x>>32)&0xff) - ((y>>32)&0xff)];
213 s += sq[((x>>40)&0xff) - ((y>>40)&0xff)];
214 s += sq[((x>>48)&0xff) - ((y>>48)&0xff)];
215 s += sq[((x>>56)&0xff) - ((y>>56)&0xff)];
216 #else
217 uint32_t x,y;
218 x=*(uint32_t*)pix1;
219 y=*(uint32_t*)pix2;
220
221 s += sq[(x&0xff) - (y&0xff)];
222 s += sq[((x>>8)&0xff) - ((y>>8)&0xff)];
223 s += sq[((x>>16)&0xff) - ((y>>16)&0xff)];
224 s += sq[((x>>24)&0xff) - ((y>>24)&0xff)];
225
226 x=*(uint32_t*)(pix1+4);
227 y=*(uint32_t*)(pix2+4);
228 s += sq[(x&0xff) - (y&0xff)];
229 s += sq[((x>>8)&0xff) - ((y>>8)&0xff)];
230 s += sq[((x>>16)&0xff) - ((y>>16)&0xff)];
231 s += sq[((x>>24)&0xff) - ((y>>24)&0xff)];
232 #endif
233 #else
177 s += sq[pix1[0] - pix2[0]]; 234 s += sq[pix1[0] - pix2[0]];
178 s += sq[pix1[1] - pix2[1]]; 235 s += sq[pix1[1] - pix2[1]];
179 s += sq[pix1[2] - pix2[2]]; 236 s += sq[pix1[2] - pix2[2]];
180 s += sq[pix1[3] - pix2[3]]; 237 s += sq[pix1[3] - pix2[3]];
181 s += sq[pix1[4] - pix2[4]]; 238 s += sq[pix1[4] - pix2[4]];
182 s += sq[pix1[5] - pix2[5]]; 239 s += sq[pix1[5] - pix2[5]];
183 s += sq[pix1[6] - pix2[6]]; 240 s += sq[pix1[6] - pix2[6]];
184 s += sq[pix1[7] - pix2[7]]; 241 s += sq[pix1[7] - pix2[7]];
242 #endif
185 pix1 += 8; 243 pix1 += 8;
186 pix2 += 8; 244 pix2 += 8;
187 } 245 }
188 pix1 += line_size - 16; 246 pix1 += line_size - 16;
189 pix2 += line_size - 16; 247 pix2 += line_size - 16;