Mercurial > libavcodec.hg
comparison x86/fft_mmx.asm @ 12432:f61e22f8cf28 libavcodec
cosmetics in imdct_sse
author | lorenm |
---|---|
date | Sat, 28 Aug 2010 21:03:13 +0000 |
parents | 020540442072 |
children | 67e7e49058c2 |
comparison
equal
deleted
inserted
replaced
12431:9f8d9abd7984 | 12432:f61e22f8cf28 |
---|---|
530 movaps xmm0, xmm1 | 530 movaps xmm0, xmm1 |
531 unpcklps xmm1, xmm2 | 531 unpcklps xmm1, xmm2 |
532 unpckhps xmm0, xmm2 | 532 unpckhps xmm0, xmm2 |
533 %endmacro | 533 %endmacro |
534 | 534 |
535 %macro PREROTATEW 3 ;addr1, addr2, xmm | |
536 movlps %1, %3 | |
537 movhps %2, %3 | |
538 %endmacro | |
539 | |
540 %macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5 | 535 %macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5 |
541 movaps xmm6, [%4+%1*2] | 536 movaps xmm6, [%4+%1*2] |
542 movaps %2, [%4+%1*2+0x10] | 537 movaps %2, [%4+%1*2+0x10] |
543 movaps %3, xmm6 | 538 movaps %3, xmm6 |
544 movaps xmm7, %2 | 539 movaps xmm7, %2 |
545 mulps xmm6, [%5+%1*1] | 540 mulps xmm6, [%5+%1] |
546 mulps %2, [%6+%1*1] | 541 mulps %2, [%6+%1] |
547 mulps %3, [%6+%1*1] | 542 mulps %3, [%6+%1] |
548 mulps xmm7, [%5+%1*1] | 543 mulps xmm7, [%5+%1] |
549 subps %2, xmm6 | 544 subps %2, xmm6 |
550 addps %3, xmm7 | 545 addps %3, xmm7 |
551 %endmacro | 546 %endmacro |
552 | 547 |
553 %macro POSROTATESHUF 5 ;j, k, z+n8, tcos+n8, tsin+n8 | 548 %macro POSROTATESHUF 5 ;j, k, z+n8, tcos+n8, tsin+n8 |
574 cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *input | 569 cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample *input |
575 %ifdef ARCH_X86_64 | 570 %ifdef ARCH_X86_64 |
576 %define rrevtab r10 | 571 %define rrevtab r10 |
577 %define rtcos r11 | 572 %define rtcos r11 |
578 %define rtsin r12 | 573 %define rtsin r12 |
579 push r10 | |
580 push r11 | |
581 push r12 | 574 push r12 |
582 push r13 | 575 push r13 |
583 push r14 | 576 push r14 |
584 %else | 577 %else |
585 %define rrevtab r6 | 578 %define rrevtab r6 |
618 mov rtcos, [esp+8] | 611 mov rtcos, [esp+8] |
619 %endif | 612 %endif |
620 | 613 |
621 PREROTATER r4, r3, r2, rtcos, rtsin | 614 PREROTATER r4, r3, r2, rtcos, rtsin |
622 %ifdef ARCH_X86_64 | 615 %ifdef ARCH_X86_64 |
623 movzx r5, word [rrevtab+r4*1-4] | 616 movzx r5, word [rrevtab+r4-4] |
624 movzx r6, word [rrevtab+r4*1-2] | 617 movzx r6, word [rrevtab+r4-2] |
625 movzx r13, word [rrevtab+r3*1] | 618 movzx r13, word [rrevtab+r3] |
626 movzx r14, word [rrevtab+r3*1+2] | 619 movzx r14, word [rrevtab+r3+2] |
627 PREROTATEW [r1+r5 *8], [r1+r6 *8], xmm0 | 620 movlps [r1+r5 *8], xmm0 |
628 PREROTATEW [r1+r13*8], [r1+r14*8], xmm1 | 621 movhps [r1+r6 *8], xmm0 |
622 movlps [r1+r13*8], xmm1 | |
623 movhps [r1+r14*8], xmm1 | |
629 add r4, 4 | 624 add r4, 4 |
630 %else | 625 %else |
631 mov r6, [esp] | 626 mov r6, [esp] |
632 movzx r5, word [r6+r4*1-4] | 627 movzx r5, word [r6+r4-4] |
633 movzx r4, word [r6+r4*1-2] | 628 movzx r4, word [r6+r4-2] |
634 PREROTATEW [r1+r5*8], [r1+r4*8], xmm0 | 629 movlps [r1+r5*8], xmm0 |
635 movzx r5, word [r6+r3*1] | 630 movhps [r1+r4*8], xmm0 |
636 movzx r4, word [r6+r3*1+2] | 631 movzx r5, word [r6+r3] |
637 PREROTATEW [r1+r5*8], [r1+r4*8], xmm1 | 632 movzx r4, word [r6+r3+2] |
633 movlps [r1+r5*8], xmm1 | |
634 movhps [r1+r4*8], xmm1 | |
638 %endif | 635 %endif |
639 sub r3, 4 | 636 sub r3, 4 |
640 jns .pre | 637 jns .pre |
641 | 638 |
642 mov r5, r0 | 639 mov r5, r0 |
661 POSROTATESHUF r0, r1, r6, rtcos, rtsin | 658 POSROTATESHUF r0, r1, r6, rtcos, rtsin |
662 %ifdef ARCH_X86_64 | 659 %ifdef ARCH_X86_64 |
663 pop r14 | 660 pop r14 |
664 pop r13 | 661 pop r13 |
665 pop r12 | 662 pop r12 |
666 pop r11 | |
667 pop r10 | |
668 %else | 663 %else |
669 add esp, 12 | 664 add esp, 12 |
670 %endif | 665 %endif |
671 RET | 666 RET |