Mercurial > libavcodec.hg
comparison x86/vp8dsp.asm @ 12340:2d15f62f4f8a libavcodec
VP8: move zeroing of luma DC block into the WHT
Lets us do the zeroing in asm instead of C.
Also makes it consistent with the way the regular iDCT code does it.
author | darkshikari |
---|---|
date | Mon, 02 Aug 2010 20:18:09 +0000 |
parents | 435319d67bd8 |
children | 4f13b2ded34d |
comparison
equal
deleted
inserted
replaced
12339:57fc7f2d7b28 | 12340:2d15f62f4f8a |
---|---|
1184 SUMSUB_BADC m%2, m%1, m%4, m%3 | 1184 SUMSUB_BADC m%2, m%1, m%4, m%3 |
1185 SUMSUB_BADC m%4, m%2, m%3, m%1 | 1185 SUMSUB_BADC m%4, m%2, m%3, m%1 |
1186 SWAP %1, %4, %3 | 1186 SWAP %1, %4, %3 |
1187 %endmacro | 1187 %endmacro |
1188 | 1188 |
1189 INIT_MMX | 1189 %macro VP8_DC_WHT 1 |
1190 cglobal vp8_luma_dc_wht_mmx, 2,3 | 1190 cglobal vp8_luma_dc_wht_%1, 2,3 |
1191 movq m0, [r1] | 1191 movq m0, [r1] |
1192 movq m1, [r1+8] | 1192 movq m1, [r1+8] |
1193 movq m2, [r1+16] | 1193 movq m2, [r1+16] |
1194 movq m3, [r1+24] | 1194 movq m3, [r1+24] |
1195 %ifidn %1, sse | |
1196 xorps xmm0, xmm0 | |
1197 movaps [r1+ 0], xmm0 | |
1198 movaps [r1+16], xmm0 | |
1199 %else | |
1200 pxor m4, m4 | |
1201 movq [r1+ 0], m4 | |
1202 movq [r1+ 8], m4 | |
1203 movq [r1+16], m4 | |
1204 movq [r1+24], m4 | |
1205 %endif | |
1195 HADAMARD4_1D 0, 1, 2, 3 | 1206 HADAMARD4_1D 0, 1, 2, 3 |
1196 TRANSPOSE4x4W 0, 1, 2, 3, 4 | 1207 TRANSPOSE4x4W 0, 1, 2, 3, 4 |
1197 paddw m0, [pw_3] | 1208 paddw m0, [pw_3] |
1198 HADAMARD4_1D 0, 1, 2, 3 | 1209 HADAMARD4_1D 0, 1, 2, 3 |
1199 psraw m0, 3 | 1210 psraw m0, 3 |
1201 psraw m2, 3 | 1212 psraw m2, 3 |
1202 psraw m3, 3 | 1213 psraw m3, 3 |
1203 SCATTER_WHT 0, 1, 0 | 1214 SCATTER_WHT 0, 1, 0 |
1204 SCATTER_WHT 2, 3, 2 | 1215 SCATTER_WHT 2, 3, 2 |
1205 RET | 1216 RET |
1217 %endmacro | |
1218 | |
1219 INIT_MMX | |
1220 VP8_DC_WHT mmx | |
1221 VP8_DC_WHT sse | |
1206 | 1222 |
1207 ;----------------------------------------------------------------------------- | 1223 ;----------------------------------------------------------------------------- |
1208 ; void vp8_h/v_loop_filter_simple_<opt>(uint8_t *dst, int stride, int flim); | 1224 ; void vp8_h/v_loop_filter_simple_<opt>(uint8_t *dst, int stride, int flim); |
1209 ;----------------------------------------------------------------------------- | 1225 ;----------------------------------------------------------------------------- |
1210 | 1226 |