Mercurial > libavcodec.hg
comparison i386/fdct_mmx.c @ 1565:1a9a63f59849 libavcodec
minor mmx2 optimization if the dct
author | michael |
---|---|
date | Thu, 23 Oct 2003 13:22:20 +0000 |
parents | b6b7d080f1a1 |
children | 9a9c14e87ebf |
comparison
equal
deleted
inserted
replaced
1564:b6b7d080f1a1 | 1565:1a9a63f59849 |
---|---|
208 psubsw_r2r(mm1, mm3); | 208 psubsw_r2r(mm1, mm3); |
209 movq_r2m(mm5, *(out + offset + 5 * 8)); | 209 movq_r2m(mm5, *(out + offset + 5 * 8)); |
210 movq_r2m(mm3, *(out + offset + 7 * 8)); | 210 movq_r2m(mm3, *(out + offset + 7 * 8)); |
211 } | 211 } |
212 | 212 |
213 static always_inline void fdct_row(const int16_t *in, int16_t *out, const int16_t *table) | 213 static always_inline void fdct_row(const int16_t *in, int16_t *out, const int16_t *table, int mmx2) |
214 { | 214 { |
215 if(mmx2){ | |
216 pshufw_m2r(*(in + 4), mm5, 0x1B); | |
217 movq_m2r(*(in + 0), mm0); | |
218 }else{ | |
215 movd_m2r(*(in + 6), mm5); | 219 movd_m2r(*(in + 6), mm5); |
216 punpcklwd_m2r(*(in + 4), mm5); | 220 punpcklwd_m2r(*(in + 4), mm5); |
217 movq_r2r(mm5, mm2); | 221 movq_r2r(mm5, mm2); |
218 psrlq_i2r(0x20, mm5); | 222 psrlq_i2r(0x20, mm5); |
219 movq_m2r(*(in + 0), mm0); | 223 movq_m2r(*(in + 0), mm0); |
220 punpcklwd_r2r(mm2, mm5); | 224 punpcklwd_r2r(mm2, mm5); |
225 } | |
221 movq_r2r(mm0, mm1); | 226 movq_r2r(mm0, mm1); |
222 paddsw_r2r(mm5, mm0); | 227 paddsw_r2r(mm5, mm0); |
223 psubsw_r2r(mm5, mm1); | 228 psubsw_r2r(mm5, mm1); |
224 movq_r2r(mm0, mm2); | 229 movq_r2r(mm0, mm2); |
225 punpcklwd_r2r(mm1, mm0); | 230 punpcklwd_r2r(mm1, mm0); |
281 | 286 |
282 block1 = block_tmp; | 287 block1 = block_tmp; |
283 table = tab_frw_01234567; | 288 table = tab_frw_01234567; |
284 out = block; | 289 out = block; |
285 for(i=8;i>0;i--) { | 290 for(i=8;i>0;i--) { |
286 fdct_row(block1, out, table); | 291 fdct_row(block1, out, table, 0); |
287 block1 += 8; | 292 block1 += 8; |
288 table += 32; | 293 table += 32; |
289 out += 8; | 294 out += 8; |
290 } | 295 } |
291 } | 296 } |
297 | |
298 void ff_fdct_mmx2(int16_t *block) | |
299 { | |
300 int64_t align_tmp[16] ATTR_ALIGN(8); | |
301 int16_t * const block_tmp= (int16_t*)align_tmp; | |
302 int16_t *block1, *out; | |
303 const int16_t *table; | |
304 int i; | |
305 | |
306 block1 = block_tmp; | |
307 fdct_col(block, block1, 0); | |
308 fdct_col(block, block1, 4); | |
309 | |
310 block1 = block_tmp; | |
311 table = tab_frw_01234567; | |
312 out = block; | |
313 for(i=8;i>0;i--) { | |
314 fdct_row(block1, out, table, 1); | |
315 block1 += 8; | |
316 table += 32; | |
317 out += 8; | |
318 } | |
319 } |