Mercurial > libavcodec.hg
comparison x86/vp8dsp.asm @ 12006:d584c7373a64 libavcodec
Add mmxext version of VP8 DC Hadamard transform
author | darkshikari |
---|---|
date | Tue, 29 Jun 2010 01:41:59 +0000 |
parents | a717c1a93036 |
children | 2ae70e2c31a4 |
comparison
equal
deleted
inserted
replaced
12005:88563eada57f | 12006:d584c7373a64 |
---|---|
19 ;* License along with FFmpeg; if not, write to the Free Software | 19 ;* License along with FFmpeg; if not, write to the Free Software |
20 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | 20 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 ;****************************************************************************** | 21 ;****************************************************************************** |
22 | 22 |
23 %include "x86inc.asm" | 23 %include "x86inc.asm" |
24 %include "x86util.asm" | |
24 | 25 |
25 SECTION_RODATA | 26 SECTION_RODATA |
26 | 27 |
27 fourtap_filter_hw_m: times 4 dw -6, 123 | 28 fourtap_filter_hw_m: times 4 dw -6, 123 |
28 times 4 dw 12, -1 | 29 times 4 dw 12, -1 |
139 | 140 |
140 filter_h6_shuf1: db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12 | 141 filter_h6_shuf1: db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12 |
141 filter_h6_shuf2: db 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9 | 142 filter_h6_shuf2: db 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9 |
142 filter_h6_shuf3: db 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11 | 143 filter_h6_shuf3: db 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11 |
143 | 144 |
145 cextern pw_3 | |
144 cextern pw_4 | 146 cextern pw_4 |
145 cextern pw_64 | 147 cextern pw_64 |
146 | 148 |
147 SECTION .text | 149 SECTION .text |
148 | 150 |
918 movd [r0], xmm2 | 920 movd [r0], xmm2 |
919 pextrd [r0+r2], xmm2, 1 | 921 pextrd [r0+r2], xmm2, 1 |
920 pextrd [r1], xmm2, 2 | 922 pextrd [r1], xmm2, 2 |
921 pextrd [r1+r2], xmm2, 3 | 923 pextrd [r1+r2], xmm2, 3 |
922 RET | 924 RET |
925 | |
926 ;----------------------------------------------------------------------------- | |
927 ; void vp8_luma_dc_wht_mmxext(DCTELEM block[4][4][16], DCTELEM dc[16]) | |
928 ;----------------------------------------------------------------------------- | |
929 | |
930 %macro SCATTER_WHT 1 | |
931 pextrw r1d, m0, %1 | |
932 pextrw r2d, m1, %1 | |
933 mov [r0+2*16*0], r1w | |
934 mov [r0+2*16*1], r2w | |
935 pextrw r1d, m2, %1 | |
936 pextrw r2d, m3, %1 | |
937 mov [r0+2*16*2], r1w | |
938 mov [r0+2*16*3], r2w | |
939 %endmacro | |
940 | |
941 %macro HADAMARD4_1D 4 | |
942 SUMSUB_BADC m%2, m%1, m%4, m%3 | |
943 SUMSUB_BADC m%4, m%2, m%3, m%1 | |
944 SWAP %1, %4, %3 | |
945 %endmacro | |
946 | |
947 INIT_MMX | |
948 cglobal vp8_luma_dc_wht_mmxext, 2,3 | |
949 movq m0, [r1] | |
950 movq m1, [r1+8] | |
951 movq m2, [r1+16] | |
952 movq m3, [r1+24] | |
953 HADAMARD4_1D 0, 1, 2, 3 | |
954 TRANSPOSE4x4W 0, 1, 2, 3, 4 | |
955 paddw m0, [pw_3] | |
956 HADAMARD4_1D 0, 1, 2, 3 | |
957 psraw m0, 3 | |
958 psraw m1, 3 | |
959 psraw m2, 3 | |
960 psraw m3, 3 | |
961 SCATTER_WHT 0 | |
962 add r0, 2*16*4 | |
963 SCATTER_WHT 1 | |
964 add r0, 2*16*4 | |
965 SCATTER_WHT 2 | |
966 add r0, 2*16*4 | |
967 SCATTER_WHT 3 | |
968 RET |