comparison i386/snowdsp_mmx.c @ 5592:fadffa1e5aef libavcodec

prevent one overflow in the first vertical lifting step
author michael
date Sat, 25 Aug 2007 16:28:45 +0000
parents 642588a60570
children bd015f9ea964
comparison
equal deleted inserted replaced
5591:642588a60570 5592:fadffa1e5aef
446 "mov %4, %%"REG_S" \n\t" 446 "mov %4, %%"REG_S" \n\t"
447 447
448 snow_vertical_compose_sse2_load(REG_S,"xmm0","xmm2","xmm4","xmm6") 448 snow_vertical_compose_sse2_load(REG_S,"xmm0","xmm2","xmm4","xmm6")
449 snow_vertical_compose_sse2_add(REG_a,"xmm0","xmm2","xmm4","xmm6") 449 snow_vertical_compose_sse2_add(REG_a,"xmm0","xmm2","xmm4","xmm6")
450 snow_vertical_compose_sse2_move("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7") 450 snow_vertical_compose_sse2_move("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7")
451 snow_vertical_compose_sse2_r2r_add("xmm0","xmm2","xmm4","xmm6","xmm0","xmm2","xmm4","xmm6") 451 snow_vertical_compose_sse2_sra("1","xmm0","xmm2","xmm4","xmm6")
452 snow_vertical_compose_sse2_r2r_add("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6") 452 snow_vertical_compose_sse2_r2r_add("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6")
453 453
454 "pcmpeqd %%xmm1, %%xmm1 \n\t" 454 "pcmpeqd %%xmm1, %%xmm1 \n\t"
455 "psllw $15, %%xmm1 \n\t" 455 "psllw $15, %%xmm1 \n\t"
456 "psrlw $13, %%xmm1 \n\t" 456 "psrlw $14, %%xmm1 \n\t"
457 "mov %5, %%"REG_a" \n\t" 457 "mov %5, %%"REG_a" \n\t"
458 458
459 snow_vertical_compose_sse2_r2r_add("xmm1","xmm1","xmm1","xmm1","xmm0","xmm2","xmm4","xmm6") 459 snow_vertical_compose_sse2_r2r_add("xmm1","xmm1","xmm1","xmm1","xmm0","xmm2","xmm4","xmm6")
460 snow_vertical_compose_sse2_sra("3","xmm0","xmm2","xmm4","xmm6") 460 snow_vertical_compose_sse2_sra("2","xmm0","xmm2","xmm4","xmm6")
461 snow_vertical_compose_sse2_load(REG_a,"xmm1","xmm3","xmm5","xmm7") 461 snow_vertical_compose_sse2_load(REG_a,"xmm1","xmm3","xmm5","xmm7")
462 snow_vertical_compose_sse2_sub("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7") 462 snow_vertical_compose_sse2_sub("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7")
463 snow_vertical_compose_sse2_store(REG_a,"xmm1","xmm3","xmm5","xmm7") 463 snow_vertical_compose_sse2_store(REG_a,"xmm1","xmm3","xmm5","xmm7")
464 "mov %3, %%"REG_c" \n\t" 464 "mov %3, %%"REG_c" \n\t"
465 snow_vertical_compose_sse2_load(REG_S,"xmm0","xmm2","xmm4","xmm6") 465 snow_vertical_compose_sse2_load(REG_S,"xmm0","xmm2","xmm4","xmm6")
548 "mov %4, %%"REG_S" \n\t" 548 "mov %4, %%"REG_S" \n\t"
549 549
550 snow_vertical_compose_mmx_load(REG_S,"mm0","mm2","mm4","mm6") 550 snow_vertical_compose_mmx_load(REG_S,"mm0","mm2","mm4","mm6")
551 snow_vertical_compose_mmx_add(REG_a,"mm0","mm2","mm4","mm6") 551 snow_vertical_compose_mmx_add(REG_a,"mm0","mm2","mm4","mm6")
552 snow_vertical_compose_mmx_move("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7") 552 snow_vertical_compose_mmx_move("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7")
553 snow_vertical_compose_mmx_r2r_add("mm0","mm2","mm4","mm6","mm0","mm2","mm4","mm6") 553 snow_vertical_compose_mmx_sra("1","mm0","mm2","mm4","mm6")
554 snow_vertical_compose_mmx_r2r_add("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6") 554 snow_vertical_compose_mmx_r2r_add("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6")
555 555
556 "pcmpeqw %%mm1, %%mm1 \n\t" 556 "pcmpeqw %%mm1, %%mm1 \n\t"
557 "psllw $15, %%mm1 \n\t" 557 "psllw $15, %%mm1 \n\t"
558 "psrlw $13, %%mm1 \n\t" 558 "psrlw $14, %%mm1 \n\t"
559 "mov %5, %%"REG_a" \n\t" 559 "mov %5, %%"REG_a" \n\t"
560 560
561 snow_vertical_compose_mmx_r2r_add("mm1","mm1","mm1","mm1","mm0","mm2","mm4","mm6") 561 snow_vertical_compose_mmx_r2r_add("mm1","mm1","mm1","mm1","mm0","mm2","mm4","mm6")
562 snow_vertical_compose_mmx_sra("3","mm0","mm2","mm4","mm6") 562 snow_vertical_compose_mmx_sra("2","mm0","mm2","mm4","mm6")
563 snow_vertical_compose_mmx_load(REG_a,"mm1","mm3","mm5","mm7") 563 snow_vertical_compose_mmx_load(REG_a,"mm1","mm3","mm5","mm7")
564 snow_vertical_compose_mmx_sub("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7") 564 snow_vertical_compose_mmx_sub("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7")
565 snow_vertical_compose_mmx_store(REG_a,"mm1","mm3","mm5","mm7") 565 snow_vertical_compose_mmx_store(REG_a,"mm1","mm3","mm5","mm7")
566 "mov %3, %%"REG_c" \n\t" 566 "mov %3, %%"REG_c" \n\t"
567 snow_vertical_compose_mmx_load(REG_S,"mm0","mm2","mm4","mm6") 567 snow_vertical_compose_mmx_load(REG_S,"mm0","mm2","mm4","mm6")