comparison i386/fdct_mmx.c @ 1565:1a9a63f59849 libavcodec

minor mmx2 optimization if the dct
author michael
date Thu, 23 Oct 2003 13:22:20 +0000
parents b6b7d080f1a1
children 9a9c14e87ebf
comparison
equal deleted inserted replaced
1564:b6b7d080f1a1 1565:1a9a63f59849
208 psubsw_r2r(mm1, mm3); 208 psubsw_r2r(mm1, mm3);
209 movq_r2m(mm5, *(out + offset + 5 * 8)); 209 movq_r2m(mm5, *(out + offset + 5 * 8));
210 movq_r2m(mm3, *(out + offset + 7 * 8)); 210 movq_r2m(mm3, *(out + offset + 7 * 8));
211 } 211 }
212 212
213 static always_inline void fdct_row(const int16_t *in, int16_t *out, const int16_t *table) 213 static always_inline void fdct_row(const int16_t *in, int16_t *out, const int16_t *table, int mmx2)
214 { 214 {
215 if(mmx2){
216 pshufw_m2r(*(in + 4), mm5, 0x1B);
217 movq_m2r(*(in + 0), mm0);
218 }else{
215 movd_m2r(*(in + 6), mm5); 219 movd_m2r(*(in + 6), mm5);
216 punpcklwd_m2r(*(in + 4), mm5); 220 punpcklwd_m2r(*(in + 4), mm5);
217 movq_r2r(mm5, mm2); 221 movq_r2r(mm5, mm2);
218 psrlq_i2r(0x20, mm5); 222 psrlq_i2r(0x20, mm5);
219 movq_m2r(*(in + 0), mm0); 223 movq_m2r(*(in + 0), mm0);
220 punpcklwd_r2r(mm2, mm5); 224 punpcklwd_r2r(mm2, mm5);
225 }
221 movq_r2r(mm0, mm1); 226 movq_r2r(mm0, mm1);
222 paddsw_r2r(mm5, mm0); 227 paddsw_r2r(mm5, mm0);
223 psubsw_r2r(mm5, mm1); 228 psubsw_r2r(mm5, mm1);
224 movq_r2r(mm0, mm2); 229 movq_r2r(mm0, mm2);
225 punpcklwd_r2r(mm1, mm0); 230 punpcklwd_r2r(mm1, mm0);
281 286
282 block1 = block_tmp; 287 block1 = block_tmp;
283 table = tab_frw_01234567; 288 table = tab_frw_01234567;
284 out = block; 289 out = block;
285 for(i=8;i>0;i--) { 290 for(i=8;i>0;i--) {
286 fdct_row(block1, out, table); 291 fdct_row(block1, out, table, 0);
287 block1 += 8; 292 block1 += 8;
288 table += 32; 293 table += 32;
289 out += 8; 294 out += 8;
290 } 295 }
291 } 296 }
297
298 void ff_fdct_mmx2(int16_t *block)
299 {
300 int64_t align_tmp[16] ATTR_ALIGN(8);
301 int16_t * const block_tmp= (int16_t*)align_tmp;
302 int16_t *block1, *out;
303 const int16_t *table;
304 int i;
305
306 block1 = block_tmp;
307 fdct_col(block, block1, 0);
308 fdct_col(block, block1, 4);
309
310 block1 = block_tmp;
311 table = tab_frw_01234567;
312 out = block;
313 for(i=8;i>0;i--) {
314 fdct_row(block1, out, table, 1);
315 block1 += 8;
316 table += 32;
317 out += 8;
318 }
319 }