comparison x86/vp8dsp.asm @ 12006:d584c7373a64 libavcodec

Add mmxext version of VP8 DC Hadamard transform
author darkshikari
date Tue, 29 Jun 2010 01:41:59 +0000
parents a717c1a93036
children 2ae70e2c31a4
comparison
equal deleted inserted replaced
12005:88563eada57f 12006:d584c7373a64
19 ;* License along with FFmpeg; if not, write to the Free Software 19 ;* License along with FFmpeg; if not, write to the Free Software
20 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 ;****************************************************************************** 21 ;******************************************************************************
22 22
23 %include "x86inc.asm" 23 %include "x86inc.asm"
24 %include "x86util.asm"
24 25
25 SECTION_RODATA 26 SECTION_RODATA
26 27
27 fourtap_filter_hw_m: times 4 dw -6, 123 28 fourtap_filter_hw_m: times 4 dw -6, 123
28 times 4 dw 12, -1 29 times 4 dw 12, -1
139 140
140 filter_h6_shuf1: db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12 141 filter_h6_shuf1: db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12
141 filter_h6_shuf2: db 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9 142 filter_h6_shuf2: db 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9
142 filter_h6_shuf3: db 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11 143 filter_h6_shuf3: db 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11
143 144
145 cextern pw_3
144 cextern pw_4 146 cextern pw_4
145 cextern pw_64 147 cextern pw_64
146 148
147 SECTION .text 149 SECTION .text
148 150
918 movd [r0], xmm2 920 movd [r0], xmm2
919 pextrd [r0+r2], xmm2, 1 921 pextrd [r0+r2], xmm2, 1
920 pextrd [r1], xmm2, 2 922 pextrd [r1], xmm2, 2
921 pextrd [r1+r2], xmm2, 3 923 pextrd [r1+r2], xmm2, 3
922 RET 924 RET
925
926 ;-----------------------------------------------------------------------------
927 ; void vp8_luma_dc_wht_mmxext(DCTELEM block[4][4][16], DCTELEM dc[16])
928 ;-----------------------------------------------------------------------------
929
930 %macro SCATTER_WHT 1
931 pextrw r1d, m0, %1
932 pextrw r2d, m1, %1
933 mov [r0+2*16*0], r1w
934 mov [r0+2*16*1], r2w
935 pextrw r1d, m2, %1
936 pextrw r2d, m3, %1
937 mov [r0+2*16*2], r1w
938 mov [r0+2*16*3], r2w
939 %endmacro
940
941 %macro HADAMARD4_1D 4
942 SUMSUB_BADC m%2, m%1, m%4, m%3
943 SUMSUB_BADC m%4, m%2, m%3, m%1
944 SWAP %1, %4, %3
945 %endmacro
946
947 INIT_MMX
948 cglobal vp8_luma_dc_wht_mmxext, 2,3
949 movq m0, [r1]
950 movq m1, [r1+8]
951 movq m2, [r1+16]
952 movq m3, [r1+24]
953 HADAMARD4_1D 0, 1, 2, 3
954 TRANSPOSE4x4W 0, 1, 2, 3, 4
955 paddw m0, [pw_3]
956 HADAMARD4_1D 0, 1, 2, 3
957 psraw m0, 3
958 psraw m1, 3
959 psraw m2, 3
960 psraw m3, 3
961 SCATTER_WHT 0
962 add r0, 2*16*4
963 SCATTER_WHT 1
964 add r0, 2*16*4
965 SCATTER_WHT 2
966 add r0, 2*16*4
967 SCATTER_WHT 3
968 RET