comparison x86/h264_intrapred.asm @ 12003:3b761226ea35 libavcodec

Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
author darkshikari
date Mon, 28 Jun 2010 23:53:07 +0000
parents 9c8584f68754
children 2002ea7c06f6
comparison
equal deleted inserted replaced
12002:9c8584f68754 12003:3b761226ea35
492 mov [r4+r2*0], r3d 492 mov [r4+r2*0], r3d
493 mov [r0+r2*0], r3d 493 mov [r0+r2*0], r3d
494 mov [r0+r2*1], r3d 494 mov [r0+r2*1], r3d
495 mov [r0+r2*2], r3d 495 mov [r0+r2*2], r3d
496 RET 496 RET
497
498 ;-----------------------------------------------------------------------------
499 ; void pred4x4_tm_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride)
500 ;-----------------------------------------------------------------------------
501
502 %macro PRED4x4_TM_MMX 1
503 cglobal pred4x4_tm_vp8_%1, 3,6
504 sub r0, r2
505 pxor mm7, mm7
506 movd mm0, [r0]
507 punpcklbw mm0, mm7
508 movzx r4d, byte [r0-1]
509 mov r5d, 2
510 .loop:
511 movzx r1d, byte [r0+r2*1-1]
512 movzx r3d, byte [r0+r2*2-1]
513 sub r1d, r4d
514 sub r3d, r4d
515 movd mm2, r1d
516 movd mm4, r3d
517 %ifidn %1, mmx
518 punpcklwd mm2, mm2
519 punpcklwd mm4, mm4
520 punpckldq mm2, mm2
521 punpckldq mm4, mm4
522 %else
523 pshufw mm2, mm2, 0
524 pshufw mm4, mm4, 0
525 %endif
526 paddw mm2, mm0
527 paddw mm4, mm0
528 packuswb mm2, mm2
529 packuswb mm4, mm4
530 movd [r0+r2*1], mm2
531 movd [r0+r2*2], mm4
532 lea r0, [r0+r2*2]
533 dec r5d
534 jg .loop
535 REP_RET
536 %endmacro
537
538 PRED4x4_TM_MMX mmx
539 PRED4x4_TM_MMX mmxext
540
541 cglobal pred4x4_tm_vp8_ssse3, 3,3
542 sub r0, r2
543 movq mm6, [tm_shuf]
544 pxor mm1, mm1
545 movd mm0, [r0]
546 punpcklbw mm0, mm1
547 movd mm7, [r0-4]
548 pshufb mm7, mm6
549 lea r1, [r0+r2*2]
550 movd mm2, [r0+r2*1-4]
551 movd mm3, [r0+r2*2-4]
552 movd mm4, [r1+r2*1-4]
553 movd mm5, [r1+r2*2-4]
554 pshufb mm2, mm6
555 pshufb mm3, mm6
556 pshufb mm4, mm6
557 pshufb mm5, mm6
558 psubw mm2, mm7
559 psubw mm3, mm7
560 psubw mm4, mm7
561 psubw mm5, mm7
562 paddw mm2, mm0
563 paddw mm3, mm0
564 paddw mm4, mm0
565 paddw mm5, mm0
566 packuswb mm2, mm2
567 packuswb mm3, mm3
568 packuswb mm4, mm4
569 packuswb mm5, mm5
570 movd [r0+r2*1], mm2
571 movd [r0+r2*2], mm3
572 movd [r1+r2*1], mm4
573 movd [r1+r2*2], mm5
574 RET