comparison x86/vp8dsp.asm @ 12400:4f13b2ded34d libavcodec

Fix segfaults in VP8 SIMD code on Win64 (and FATE/win64 failures).
author rbultje
date Mon, 23 Aug 2010 02:41:22 +0000
parents 2d15f62f4f8a
children e6e4059ea421
comparison
equal deleted inserted replaced
12399:020540442072 12400:4f13b2ded34d
209 movh [r0], m0 ; store 209 movh [r0], m0 ; store
210 210
211 ; go to next line 211 ; go to next line
212 add r0, r1 212 add r0, r1
213 add r2, r3 213 add r2, r3
214 dec r4 ; next row 214 dec r4d ; next row
215 jg .nextrow 215 jg .nextrow
216 REP_RET 216 REP_RET
217 217
218 cglobal put_vp8_epel%1_h4_ssse3, 6, 6, %3 218 cglobal put_vp8_epel%1_h4_ssse3, 6, 6, %3
219 shl r5d, 4 219 shl r5d, 4
240 movh [r0], m0 ; store 240 movh [r0], m0 ; store
241 241
242 ; go to next line 242 ; go to next line
243 add r0, r1 243 add r0, r1
244 add r2, r3 244 add r2, r3
245 dec r4 ; next row 245 dec r4d ; next row
246 jg .nextrow 246 jg .nextrow
247 REP_RET 247 REP_RET
248 248
249 cglobal put_vp8_epel%1_v4_ssse3, 7, 7, %2 249 cglobal put_vp8_epel%1_v4_ssse3, 7, 7, %2
250 shl r6d, 4 250 shl r6d, 4
279 movh [r0], m4 279 movh [r0], m4
280 280
281 ; go to next line 281 ; go to next line
282 add r0, r1 282 add r0, r1
283 add r2, r3 283 add r2, r3
284 dec r4 ; next row 284 dec r4d ; next row
285 jg .nextrow 285 jg .nextrow
286 REP_RET 286 REP_RET
287 287
288 cglobal put_vp8_epel%1_v6_ssse3, 7, 7, %2 288 cglobal put_vp8_epel%1_v6_ssse3, 7, 7, %2
289 lea r6d, [r6*3] 289 lea r6d, [r6*3]
326 movh [r0], m6 326 movh [r0], m6
327 327
328 ; go to next line 328 ; go to next line
329 add r0, r1 329 add r0, r1
330 add r2, r3 330 add r2, r3
331 dec r4 ; next row 331 dec r4d ; next row
332 jg .nextrow 332 jg .nextrow
333 REP_RET 333 REP_RET
334 %endmacro 334 %endmacro
335 335
336 INIT_MMX 336 INIT_MMX
379 movd [r0], mm3 ; store 379 movd [r0], mm3 ; store
380 380
381 ; go to next line 381 ; go to next line
382 add r0, r1 382 add r0, r1
383 add r2, r3 383 add r2, r3
384 dec r4 ; next row 384 dec r4d ; next row
385 jg .nextrow 385 jg .nextrow
386 REP_RET 386 REP_RET
387 387
388 ; 4x4 block, H-only 6-tap filter 388 ; 4x4 block, H-only 6-tap filter
389 cglobal put_vp8_epel4_h6_mmxext, 6, 6 389 cglobal put_vp8_epel4_h6_mmxext, 6, 6
436 movd [r0], mm1 ; store 436 movd [r0], mm1 ; store
437 437
438 ; go to next line 438 ; go to next line
439 add r0, r1 439 add r0, r1
440 add r2, r3 440 add r2, r3
441 dec r4 ; next row 441 dec r4d ; next row
442 jg .nextrow 442 jg .nextrow
443 REP_RET 443 REP_RET
444 444
445 INIT_XMM 445 INIT_XMM
446 cglobal put_vp8_epel8_h4_sse2, 6, 6, 10 446 cglobal put_vp8_epel8_h4_sse2, 6, 6, 10
484 movh [r0], m0 ; store 484 movh [r0], m0 ; store
485 485
486 ; go to next line 486 ; go to next line
487 add r0, r1 487 add r0, r1
488 add r2, r3 488 add r2, r3
489 dec r4 ; next row 489 dec r4d ; next row
490 jg .nextrow 490 jg .nextrow
491 REP_RET 491 REP_RET
492 492
493 cglobal put_vp8_epel8_h6_sse2, 6, 6, 14 493 cglobal put_vp8_epel8_h6_sse2, 6, 6, 14
494 lea r5d, [r5*3] 494 lea r5d, [r5*3]
546 movh [r0], m0 ; store 546 movh [r0], m0 ; store
547 547
548 ; go to next line 548 ; go to next line
549 add r0, r1 549 add r0, r1
550 add r2, r3 550 add r2, r3
551 dec r4 ; next row 551 dec r4d ; next row
552 jg .nextrow 552 jg .nextrow
553 REP_RET 553 REP_RET
554 554
555 %macro FILTER_V 3 555 %macro FILTER_V 3
556 ; 4x4 block, V-only 4-tap filter 556 ; 4x4 block, V-only 4-tap filter
599 movh [r0], m4 599 movh [r0], m4
600 600
601 ; go to next line 601 ; go to next line
602 add r0, r1 602 add r0, r1
603 add r2, r3 603 add r2, r3
604 dec r4 ; next row 604 dec r4d ; next row
605 jg .nextrow 605 jg .nextrow
606 REP_RET 606 REP_RET
607 607
608 608
609 ; 4x4 block, V-only 6-tap filter 609 ; 4x4 block, V-only 6-tap filter
664 movh [r0], m6 664 movh [r0], m6
665 665
666 ; go to next line 666 ; go to next line
667 add r0, r1 667 add r0, r1
668 add r2, r3 668 add r2, r3
669 dec r4 ; next row 669 dec r4d ; next row
670 jg .nextrow 670 jg .nextrow
671 REP_RET 671 REP_RET
672 %endmacro 672 %endmacro
673 673
674 INIT_MMX 674 INIT_MMX
716 movhps [r0+r1*1], m0 716 movhps [r0+r1*1], m0
717 %endif 717 %endif
718 718
719 lea r0, [r0+r1*2] 719 lea r0, [r0+r1*2]
720 lea r2, [r2+r3*2] 720 lea r2, [r2+r3*2]
721 sub r4, 2 721 sub r4d, 2
722 jg .nextrow 722 jg .nextrow
723 REP_RET 723 REP_RET
724 724
725 cglobal put_vp8_bilinear%2_h_%1, 7,7,%3 725 cglobal put_vp8_bilinear%2_h_%1, 7,7,%3
726 mov r6d, 8*16 726 mov r6d, 8*16
762 movhps [r0+r1*1], m0 762 movhps [r0+r1*1], m0
763 %endif 763 %endif
764 764
765 lea r0, [r0+r1*2] 765 lea r0, [r0+r1*2]
766 lea r2, [r2+r3*2] 766 lea r2, [r2+r3*2]
767 sub r4, 2 767 sub r4d, 2
768 jg .nextrow 768 jg .nextrow
769 REP_RET 769 REP_RET
770 %endmacro 770 %endmacro
771 771
772 INIT_MMX 772 INIT_MMX
805 movhps [r0+r1*1], m0 805 movhps [r0+r1*1], m0
806 %endif 806 %endif
807 807
808 lea r0, [r0+r1*2] 808 lea r0, [r0+r1*2]
809 lea r2, [r2+r3*2] 809 lea r2, [r2+r3*2]
810 sub r4, 2 810 sub r4d, 2
811 jg .nextrow 811 jg .nextrow
812 REP_RET 812 REP_RET
813 813
814 cglobal put_vp8_bilinear%1_h_ssse3, 7,7 814 cglobal put_vp8_bilinear%1_h_ssse3, 7,7
815 shl r5d, 4 815 shl r5d, 4
841 movhps [r0+r1*1], m0 841 movhps [r0+r1*1], m0
842 %endif 842 %endif
843 843
844 lea r0, [r0+r1*2] 844 lea r0, [r0+r1*2]
845 lea r2, [r2+r3*2] 845 lea r2, [r2+r3*2]
846 sub r4, 2 846 sub r4d, 2
847 jg .nextrow 847 jg .nextrow
848 REP_RET 848 REP_RET
849 %endmacro 849 %endmacro
850 850
851 INIT_MMX 851 INIT_MMX