Mercurial > libavcodec.hg
comparison x86/h264_intrapred.asm @ 12003:3b761226ea35 libavcodec
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
author | darkshikari |
---|---|
date | Mon, 28 Jun 2010 23:53:07 +0000 |
parents | 9c8584f68754 |
children | 2002ea7c06f6 |
comparison
equal
deleted
inserted
replaced
12002:9c8584f68754 | 12003:3b761226ea35 |
---|---|
492 mov [r4+r2*0], r3d | 492 mov [r4+r2*0], r3d |
493 mov [r0+r2*0], r3d | 493 mov [r0+r2*0], r3d |
494 mov [r0+r2*1], r3d | 494 mov [r0+r2*1], r3d |
495 mov [r0+r2*2], r3d | 495 mov [r0+r2*2], r3d |
496 RET | 496 RET |
497 | |
498 ;----------------------------------------------------------------------------- | |
499 ; void pred4x4_tm_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride) | |
500 ;----------------------------------------------------------------------------- | |
501 | |
502 %macro PRED4x4_TM_MMX 1 | |
503 cglobal pred4x4_tm_vp8_%1, 3,6 | |
504 sub r0, r2 | |
505 pxor mm7, mm7 | |
506 movd mm0, [r0] | |
507 punpcklbw mm0, mm7 | |
508 movzx r4d, byte [r0-1] | |
509 mov r5d, 2 | |
510 .loop: | |
511 movzx r1d, byte [r0+r2*1-1] | |
512 movzx r3d, byte [r0+r2*2-1] | |
513 sub r1d, r4d | |
514 sub r3d, r4d | |
515 movd mm2, r1d | |
516 movd mm4, r3d | |
517 %ifidn %1, mmx | |
518 punpcklwd mm2, mm2 | |
519 punpcklwd mm4, mm4 | |
520 punpckldq mm2, mm2 | |
521 punpckldq mm4, mm4 | |
522 %else | |
523 pshufw mm2, mm2, 0 | |
524 pshufw mm4, mm4, 0 | |
525 %endif | |
526 paddw mm2, mm0 | |
527 paddw mm4, mm0 | |
528 packuswb mm2, mm2 | |
529 packuswb mm4, mm4 | |
530 movd [r0+r2*1], mm2 | |
531 movd [r0+r2*2], mm4 | |
532 lea r0, [r0+r2*2] | |
533 dec r5d | |
534 jg .loop | |
535 REP_RET | |
536 %endmacro | |
537 | |
538 PRED4x4_TM_MMX mmx | |
539 PRED4x4_TM_MMX mmxext | |
540 | |
541 cglobal pred4x4_tm_vp8_ssse3, 3,3 | |
542 sub r0, r2 | |
543 movq mm6, [tm_shuf] | |
544 pxor mm1, mm1 | |
545 movd mm0, [r0] | |
546 punpcklbw mm0, mm1 | |
547 movd mm7, [r0-4] | |
548 pshufb mm7, mm6 | |
549 lea r1, [r0+r2*2] | |
550 movd mm2, [r0+r2*1-4] | |
551 movd mm3, [r0+r2*2-4] | |
552 movd mm4, [r1+r2*1-4] | |
553 movd mm5, [r1+r2*2-4] | |
554 pshufb mm2, mm6 | |
555 pshufb mm3, mm6 | |
556 pshufb mm4, mm6 | |
557 pshufb mm5, mm6 | |
558 psubw mm2, mm7 | |
559 psubw mm3, mm7 | |
560 psubw mm4, mm7 | |
561 psubw mm5, mm7 | |
562 paddw mm2, mm0 | |
563 paddw mm3, mm0 | |
564 paddw mm4, mm0 | |
565 paddw mm5, mm0 | |
566 packuswb mm2, mm2 | |
567 packuswb mm3, mm3 | |
568 packuswb mm4, mm4 | |
569 packuswb mm5, mm5 | |
570 movd [r0+r2*1], mm2 | |
571 movd [r0+r2*2], mm3 | |
572 movd [r1+r2*1], mm4 | |
573 movd [r1+r2*2], mm5 | |
574 RET |