comparison x86/fft_mmx.asm @ 12432:f61e22f8cf28 libavcodec

cosmetics in imdct_sse
author lorenm
date Sat, 28 Aug 2010 21:03:13 +0000
parents 020540442072
children 67e7e49058c2
comparison
equal deleted inserted replaced
12431:9f8d9abd7984 12432:f61e22f8cf28
530 movaps xmm0, xmm1 530 movaps xmm0, xmm1
531 unpcklps xmm1, xmm2 531 unpcklps xmm1, xmm2
532 unpckhps xmm0, xmm2 532 unpckhps xmm0, xmm2
533 %endmacro 533 %endmacro
534 534
535 %macro PREROTATEW 3 ;addr1, addr2, xmm
536 movlps %1, %3
537 movhps %2, %3
538 %endmacro
539
540 %macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5 535 %macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5
541 movaps xmm6, [%4+%1*2] 536 movaps xmm6, [%4+%1*2]
542 movaps %2, [%4+%1*2+0x10] 537 movaps %2, [%4+%1*2+0x10]
543 movaps %3, xmm6 538 movaps %3, xmm6
544 movaps xmm7, %2 539 movaps xmm7, %2
545 mulps xmm6, [%5+%1*1] 540 mulps xmm6, [%5+%1]
546 mulps %2, [%6+%1*1] 541 mulps %2, [%6+%1]
547 mulps %3, [%6+%1*1] 542 mulps %3, [%6+%1]
548 mulps xmm7, [%5+%1*1] 543 mulps xmm7, [%5+%1]
549 subps %2, xmm6 544 subps %2, xmm6
550 addps %3, xmm7 545 addps %3, xmm7
551 %endmacro 546 %endmacro
552 547
553 %macro POSROTATESHUF 5 ;j, k, z+n8, tcos+n8, tsin+n8 548 %macro POSROTATESHUF 5 ;j, k, z+n8, tcos+n8, tsin+n8
574 cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *input 569 cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *input
575 %ifdef ARCH_X86_64 570 %ifdef ARCH_X86_64
576 %define rrevtab r10 571 %define rrevtab r10
577 %define rtcos r11 572 %define rtcos r11
578 %define rtsin r12 573 %define rtsin r12
579 push r10
580 push r11
581 push r12 574 push r12
582 push r13 575 push r13
583 push r14 576 push r14
584 %else 577 %else
585 %define rrevtab r6 578 %define rrevtab r6
618 mov rtcos, [esp+8] 611 mov rtcos, [esp+8]
619 %endif 612 %endif
620 613
621 PREROTATER r4, r3, r2, rtcos, rtsin 614 PREROTATER r4, r3, r2, rtcos, rtsin
622 %ifdef ARCH_X86_64 615 %ifdef ARCH_X86_64
623 movzx r5, word [rrevtab+r4*1-4] 616 movzx r5, word [rrevtab+r4-4]
624 movzx r6, word [rrevtab+r4*1-2] 617 movzx r6, word [rrevtab+r4-2]
625 movzx r13, word [rrevtab+r3*1] 618 movzx r13, word [rrevtab+r3]
626 movzx r14, word [rrevtab+r3*1+2] 619 movzx r14, word [rrevtab+r3+2]
627 PREROTATEW [r1+r5 *8], [r1+r6 *8], xmm0 620 movlps [r1+r5 *8], xmm0
628 PREROTATEW [r1+r13*8], [r1+r14*8], xmm1 621 movhps [r1+r6 *8], xmm0
622 movlps [r1+r13*8], xmm1
623 movhps [r1+r14*8], xmm1
629 add r4, 4 624 add r4, 4
630 %else 625 %else
631 mov r6, [esp] 626 mov r6, [esp]
632 movzx r5, word [r6+r4*1-4] 627 movzx r5, word [r6+r4-4]
633 movzx r4, word [r6+r4*1-2] 628 movzx r4, word [r6+r4-2]
634 PREROTATEW [r1+r5*8], [r1+r4*8], xmm0 629 movlps [r1+r5*8], xmm0
635 movzx r5, word [r6+r3*1] 630 movhps [r1+r4*8], xmm0
636 movzx r4, word [r6+r3*1+2] 631 movzx r5, word [r6+r3]
637 PREROTATEW [r1+r5*8], [r1+r4*8], xmm1 632 movzx r4, word [r6+r3+2]
633 movlps [r1+r5*8], xmm1
634 movhps [r1+r4*8], xmm1
638 %endif 635 %endif
639 sub r3, 4 636 sub r3, 4
640 jns .pre 637 jns .pre
641 638
642 mov r5, r0 639 mov r5, r0
661 POSROTATESHUF r0, r1, r6, rtcos, rtsin 658 POSROTATESHUF r0, r1, r6, rtcos, rtsin
662 %ifdef ARCH_X86_64 659 %ifdef ARCH_X86_64
663 pop r14 660 pop r14
664 pop r13 661 pop r13
665 pop r12 662 pop r12
666 pop r11
667 pop r10
668 %else 663 %else
669 add esp, 12 664 add esp, 12
670 %endif 665 %endif
671 RET 666 RET