comparison x86/vp8dsp.asm @ 12340:2d15f62f4f8a libavcodec

VP8: move zeroing of luma DC block into the WHT Lets us do the zeroing in asm instead of C. Also makes it consistent with the way the regular iDCT code does it.
author darkshikari
date Mon, 02 Aug 2010 20:18:09 +0000
parents 435319d67bd8
children 4f13b2ded34d
comparison
equal deleted inserted replaced
12339:57fc7f2d7b28 12340:2d15f62f4f8a
1184 SUMSUB_BADC m%2, m%1, m%4, m%3 1184 SUMSUB_BADC m%2, m%1, m%4, m%3
1185 SUMSUB_BADC m%4, m%2, m%3, m%1 1185 SUMSUB_BADC m%4, m%2, m%3, m%1
1186 SWAP %1, %4, %3 1186 SWAP %1, %4, %3
1187 %endmacro 1187 %endmacro
1188 1188
1189 INIT_MMX 1189 %macro VP8_DC_WHT 1
1190 cglobal vp8_luma_dc_wht_mmx, 2,3 1190 cglobal vp8_luma_dc_wht_%1, 2,3
1191 movq m0, [r1] 1191 movq m0, [r1]
1192 movq m1, [r1+8] 1192 movq m1, [r1+8]
1193 movq m2, [r1+16] 1193 movq m2, [r1+16]
1194 movq m3, [r1+24] 1194 movq m3, [r1+24]
1195 %ifidn %1, sse
1196 xorps xmm0, xmm0
1197 movaps [r1+ 0], xmm0
1198 movaps [r1+16], xmm0
1199 %else
1200 pxor m4, m4
1201 movq [r1+ 0], m4
1202 movq [r1+ 8], m4
1203 movq [r1+16], m4
1204 movq [r1+24], m4
1205 %endif
1195 HADAMARD4_1D 0, 1, 2, 3 1206 HADAMARD4_1D 0, 1, 2, 3
1196 TRANSPOSE4x4W 0, 1, 2, 3, 4 1207 TRANSPOSE4x4W 0, 1, 2, 3, 4
1197 paddw m0, [pw_3] 1208 paddw m0, [pw_3]
1198 HADAMARD4_1D 0, 1, 2, 3 1209 HADAMARD4_1D 0, 1, 2, 3
1199 psraw m0, 3 1210 psraw m0, 3
1201 psraw m2, 3 1212 psraw m2, 3
1202 psraw m3, 3 1213 psraw m3, 3
1203 SCATTER_WHT 0, 1, 0 1214 SCATTER_WHT 0, 1, 0
1204 SCATTER_WHT 2, 3, 2 1215 SCATTER_WHT 2, 3, 2
1205 RET 1216 RET
1217 %endmacro
1218
1219 INIT_MMX
1220 VP8_DC_WHT mmx
1221 VP8_DC_WHT sse
1206 1222
1207 ;----------------------------------------------------------------------------- 1223 ;-----------------------------------------------------------------------------
1208 ; void vp8_h/v_loop_filter_simple_<opt>(uint8_t *dst, int stride, int flim); 1224 ; void vp8_h/v_loop_filter_simple_<opt>(uint8_t *dst, int stride, int flim);
1209 ;----------------------------------------------------------------------------- 1225 ;-----------------------------------------------------------------------------
1210 1226