Mercurial > libavcodec.hg
comparison x86/vp8dsp.asm @ 12400:4f13b2ded34d libavcodec
Fix segfaults in VP8 SIMD code on Win64 (and FATE/win64 failures).
author | rbultje |
---|---|
date | Mon, 23 Aug 2010 02:41:22 +0000 |
parents | 2d15f62f4f8a |
children | e6e4059ea421 |
comparison
equal
deleted
inserted
replaced
12399:020540442072 | 12400:4f13b2ded34d |
---|---|
209 movh [r0], m0 ; store | 209 movh [r0], m0 ; store |
210 | 210 |
211 ; go to next line | 211 ; go to next line |
212 add r0, r1 | 212 add r0, r1 |
213 add r2, r3 | 213 add r2, r3 |
214 dec r4 ; next row | 214 dec r4d ; next row |
215 jg .nextrow | 215 jg .nextrow |
216 REP_RET | 216 REP_RET |
217 | 217 |
218 cglobal put_vp8_epel%1_h4_ssse3, 6, 6, %3 | 218 cglobal put_vp8_epel%1_h4_ssse3, 6, 6, %3 |
219 shl r5d, 4 | 219 shl r5d, 4 |
240 movh [r0], m0 ; store | 240 movh [r0], m0 ; store |
241 | 241 |
242 ; go to next line | 242 ; go to next line |
243 add r0, r1 | 243 add r0, r1 |
244 add r2, r3 | 244 add r2, r3 |
245 dec r4 ; next row | 245 dec r4d ; next row |
246 jg .nextrow | 246 jg .nextrow |
247 REP_RET | 247 REP_RET |
248 | 248 |
249 cglobal put_vp8_epel%1_v4_ssse3, 7, 7, %2 | 249 cglobal put_vp8_epel%1_v4_ssse3, 7, 7, %2 |
250 shl r6d, 4 | 250 shl r6d, 4 |
279 movh [r0], m4 | 279 movh [r0], m4 |
280 | 280 |
281 ; go to next line | 281 ; go to next line |
282 add r0, r1 | 282 add r0, r1 |
283 add r2, r3 | 283 add r2, r3 |
284 dec r4 ; next row | 284 dec r4d ; next row |
285 jg .nextrow | 285 jg .nextrow |
286 REP_RET | 286 REP_RET |
287 | 287 |
288 cglobal put_vp8_epel%1_v6_ssse3, 7, 7, %2 | 288 cglobal put_vp8_epel%1_v6_ssse3, 7, 7, %2 |
289 lea r6d, [r6*3] | 289 lea r6d, [r6*3] |
326 movh [r0], m6 | 326 movh [r0], m6 |
327 | 327 |
328 ; go to next line | 328 ; go to next line |
329 add r0, r1 | 329 add r0, r1 |
330 add r2, r3 | 330 add r2, r3 |
331 dec r4 ; next row | 331 dec r4d ; next row |
332 jg .nextrow | 332 jg .nextrow |
333 REP_RET | 333 REP_RET |
334 %endmacro | 334 %endmacro |
335 | 335 |
336 INIT_MMX | 336 INIT_MMX |
379 movd [r0], mm3 ; store | 379 movd [r0], mm3 ; store |
380 | 380 |
381 ; go to next line | 381 ; go to next line |
382 add r0, r1 | 382 add r0, r1 |
383 add r2, r3 | 383 add r2, r3 |
384 dec r4 ; next row | 384 dec r4d ; next row |
385 jg .nextrow | 385 jg .nextrow |
386 REP_RET | 386 REP_RET |
387 | 387 |
388 ; 4x4 block, H-only 6-tap filter | 388 ; 4x4 block, H-only 6-tap filter |
389 cglobal put_vp8_epel4_h6_mmxext, 6, 6 | 389 cglobal put_vp8_epel4_h6_mmxext, 6, 6 |
436 movd [r0], mm1 ; store | 436 movd [r0], mm1 ; store |
437 | 437 |
438 ; go to next line | 438 ; go to next line |
439 add r0, r1 | 439 add r0, r1 |
440 add r2, r3 | 440 add r2, r3 |
441 dec r4 ; next row | 441 dec r4d ; next row |
442 jg .nextrow | 442 jg .nextrow |
443 REP_RET | 443 REP_RET |
444 | 444 |
445 INIT_XMM | 445 INIT_XMM |
446 cglobal put_vp8_epel8_h4_sse2, 6, 6, 10 | 446 cglobal put_vp8_epel8_h4_sse2, 6, 6, 10 |
484 movh [r0], m0 ; store | 484 movh [r0], m0 ; store |
485 | 485 |
486 ; go to next line | 486 ; go to next line |
487 add r0, r1 | 487 add r0, r1 |
488 add r2, r3 | 488 add r2, r3 |
489 dec r4 ; next row | 489 dec r4d ; next row |
490 jg .nextrow | 490 jg .nextrow |
491 REP_RET | 491 REP_RET |
492 | 492 |
493 cglobal put_vp8_epel8_h6_sse2, 6, 6, 14 | 493 cglobal put_vp8_epel8_h6_sse2, 6, 6, 14 |
494 lea r5d, [r5*3] | 494 lea r5d, [r5*3] |
546 movh [r0], m0 ; store | 546 movh [r0], m0 ; store |
547 | 547 |
548 ; go to next line | 548 ; go to next line |
549 add r0, r1 | 549 add r0, r1 |
550 add r2, r3 | 550 add r2, r3 |
551 dec r4 ; next row | 551 dec r4d ; next row |
552 jg .nextrow | 552 jg .nextrow |
553 REP_RET | 553 REP_RET |
554 | 554 |
555 %macro FILTER_V 3 | 555 %macro FILTER_V 3 |
556 ; 4x4 block, V-only 4-tap filter | 556 ; 4x4 block, V-only 4-tap filter |
599 movh [r0], m4 | 599 movh [r0], m4 |
600 | 600 |
601 ; go to next line | 601 ; go to next line |
602 add r0, r1 | 602 add r0, r1 |
603 add r2, r3 | 603 add r2, r3 |
604 dec r4 ; next row | 604 dec r4d ; next row |
605 jg .nextrow | 605 jg .nextrow |
606 REP_RET | 606 REP_RET |
607 | 607 |
608 | 608 |
609 ; 4x4 block, V-only 6-tap filter | 609 ; 4x4 block, V-only 6-tap filter |
664 movh [r0], m6 | 664 movh [r0], m6 |
665 | 665 |
666 ; go to next line | 666 ; go to next line |
667 add r0, r1 | 667 add r0, r1 |
668 add r2, r3 | 668 add r2, r3 |
669 dec r4 ; next row | 669 dec r4d ; next row |
670 jg .nextrow | 670 jg .nextrow |
671 REP_RET | 671 REP_RET |
672 %endmacro | 672 %endmacro |
673 | 673 |
674 INIT_MMX | 674 INIT_MMX |
716 movhps [r0+r1*1], m0 | 716 movhps [r0+r1*1], m0 |
717 %endif | 717 %endif |
718 | 718 |
719 lea r0, [r0+r1*2] | 719 lea r0, [r0+r1*2] |
720 lea r2, [r2+r3*2] | 720 lea r2, [r2+r3*2] |
721 sub r4, 2 | 721 sub r4d, 2 |
722 jg .nextrow | 722 jg .nextrow |
723 REP_RET | 723 REP_RET |
724 | 724 |
725 cglobal put_vp8_bilinear%2_h_%1, 7,7,%3 | 725 cglobal put_vp8_bilinear%2_h_%1, 7,7,%3 |
726 mov r6d, 8*16 | 726 mov r6d, 8*16 |
762 movhps [r0+r1*1], m0 | 762 movhps [r0+r1*1], m0 |
763 %endif | 763 %endif |
764 | 764 |
765 lea r0, [r0+r1*2] | 765 lea r0, [r0+r1*2] |
766 lea r2, [r2+r3*2] | 766 lea r2, [r2+r3*2] |
767 sub r4, 2 | 767 sub r4d, 2 |
768 jg .nextrow | 768 jg .nextrow |
769 REP_RET | 769 REP_RET |
770 %endmacro | 770 %endmacro |
771 | 771 |
772 INIT_MMX | 772 INIT_MMX |
805 movhps [r0+r1*1], m0 | 805 movhps [r0+r1*1], m0 |
806 %endif | 806 %endif |
807 | 807 |
808 lea r0, [r0+r1*2] | 808 lea r0, [r0+r1*2] |
809 lea r2, [r2+r3*2] | 809 lea r2, [r2+r3*2] |
810 sub r4, 2 | 810 sub r4d, 2 |
811 jg .nextrow | 811 jg .nextrow |
812 REP_RET | 812 REP_RET |
813 | 813 |
814 cglobal put_vp8_bilinear%1_h_ssse3, 7,7 | 814 cglobal put_vp8_bilinear%1_h_ssse3, 7,7 |
815 shl r5d, 4 | 815 shl r5d, 4 |
841 movhps [r0+r1*1], m0 | 841 movhps [r0+r1*1], m0 |
842 %endif | 842 %endif |
843 | 843 |
844 lea r0, [r0+r1*2] | 844 lea r0, [r0+r1*2] |
845 lea r2, [r2+r3*2] | 845 lea r2, [r2+r3*2] |
846 sub r4, 2 | 846 sub r4d, 2 |
847 jg .nextrow | 847 jg .nextrow |
848 REP_RET | 848 REP_RET |
849 %endmacro | 849 %endmacro |
850 | 850 |
851 INIT_MMX | 851 INIT_MMX |