diff x86/vp8dsp.asm @ 12006:d584c7373a64 libavcodec

Add mmxext version of VP8 DC Hadamard transform
author darkshikari
date Tue, 29 Jun 2010 01:41:59 +0000
parents a717c1a93036
children 2ae70e2c31a4
line wrap: on
line diff
--- a/x86/vp8dsp.asm	Tue Jun 29 00:40:12 2010 +0000
+++ b/x86/vp8dsp.asm	Tue Jun 29 01:41:59 2010 +0000
@@ -21,6 +21,7 @@
 ;******************************************************************************
 
 %include "x86inc.asm"
+%include "x86util.asm"
 
 SECTION_RODATA
 
@@ -141,6 +142,7 @@
 filter_h6_shuf2: db 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,  7, 7,  8,  8,  9
 filter_h6_shuf3: db 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,  9, 9, 10, 10, 11
 
+cextern pw_3
 cextern pw_4
 cextern pw_64
 
@@ -920,3 +922,47 @@
     pextrd     [r1], xmm2, 2
     pextrd  [r1+r2], xmm2, 3
     RET
+
+;-----------------------------------------------------------------------------
+; void vp8_luma_dc_wht_mmxext(DCTELEM block[4][4][16], DCTELEM dc[16])
+;-----------------------------------------------------------------------------
+
+%macro SCATTER_WHT 1
+    pextrw r1d, m0, %1
+    pextrw r2d, m1, %1
+    mov [r0+2*16*0], r1w
+    mov [r0+2*16*1], r2w
+    pextrw r1d, m2, %1
+    pextrw r2d, m3, %1
+    mov [r0+2*16*2], r1w
+    mov [r0+2*16*3], r2w
+%endmacro
+
+%macro HADAMARD4_1D 4
+    SUMSUB_BADC m%2, m%1, m%4, m%3
+    SUMSUB_BADC m%4, m%2, m%3, m%1
+    SWAP %1, %4, %3
+%endmacro
+
+INIT_MMX
+cglobal vp8_luma_dc_wht_mmxext, 2,3
+    movq          m0, [r1]
+    movq          m1, [r1+8]
+    movq          m2, [r1+16]
+    movq          m3, [r1+24]
+    HADAMARD4_1D  0, 1, 2, 3
+    TRANSPOSE4x4W 0, 1, 2, 3, 4
+    paddw         m0, [pw_3]
+    HADAMARD4_1D  0, 1, 2, 3
+    psraw         m0, 3
+    psraw         m1, 3
+    psraw         m2, 3
+    psraw         m3, 3
+    SCATTER_WHT   0
+    add           r0, 2*16*4
+    SCATTER_WHT   1
+    add           r0, 2*16*4
+    SCATTER_WHT   2
+    add           r0, 2*16*4
+    SCATTER_WHT   3
+    RET