comparison x86/h264dsp_mmx.c @ 9739:96e6cab7470a libavcodec

H264: Fix out of bounds reads in SSSE3 MC Reading above src[-2] isn't safe, so move loads and palignr ahead 3 pixels to load starting at the first pixel actually used. Fixes issue941.
author astrange
date Sat, 30 May 2009 22:19:14 +0000
parents daee921fb6bb
children 54456267c77c
comparison
equal deleted inserted replaced
9738:d5929e456b07 9739:96e6cab7470a
1495 __asm__ volatile(\ 1495 __asm__ volatile(\
1496 "pxor %%xmm15, %%xmm15 \n\t"\ 1496 "pxor %%xmm15, %%xmm15 \n\t"\
1497 "movdqa %6, %%xmm14 \n\t"\ 1497 "movdqa %6, %%xmm14 \n\t"\
1498 "movdqa %7, %%xmm13 \n\t"\ 1498 "movdqa %7, %%xmm13 \n\t"\
1499 "1: \n\t"\ 1499 "1: \n\t"\
1500 "lddqu 3(%0), %%xmm1 \n\t"\ 1500 "lddqu 6(%0), %%xmm1 \n\t"\
1501 "lddqu -5(%0), %%xmm7 \n\t"\ 1501 "lddqu -2(%0), %%xmm7 \n\t"\
1502 "movdqa %%xmm1, %%xmm0 \n\t"\ 1502 "movdqa %%xmm1, %%xmm0 \n\t"\
1503 "punpckhbw %%xmm15, %%xmm1 \n\t"\ 1503 "punpckhbw %%xmm15, %%xmm1 \n\t"\
1504 "punpcklbw %%xmm15, %%xmm0 \n\t"\ 1504 "punpcklbw %%xmm15, %%xmm0 \n\t"\
1505 "punpcklbw %%xmm15, %%xmm7 \n\t"\ 1505 "punpcklbw %%xmm15, %%xmm7 \n\t"\
1506 "movdqa %%xmm1, %%xmm2 \n\t"\ 1506 "movdqa %%xmm1, %%xmm2 \n\t"\
1507 "movdqa %%xmm0, %%xmm6 \n\t"\ 1507 "movdqa %%xmm0, %%xmm6 \n\t"\
1508 "movdqa %%xmm1, %%xmm3 \n\t"\ 1508 "movdqa %%xmm1, %%xmm3 \n\t"\
1509 "movdqa %%xmm0, %%xmm8 \n\t"\ 1509 "movdqa %%xmm0, %%xmm8 \n\t"\
1510 "movdqa %%xmm1, %%xmm4 \n\t"\ 1510 "movdqa %%xmm1, %%xmm4 \n\t"\
1511 "movdqa %%xmm0, %%xmm9 \n\t"\ 1511 "movdqa %%xmm0, %%xmm9 \n\t"\
1512 "movdqa %%xmm1, %%xmm5 \n\t"\ 1512 "movdqa %%xmm0, %%xmm12 \n\t"\
1513 "movdqa %%xmm0, %%xmm10 \n\t"\ 1513 "movdqa %%xmm1, %%xmm11 \n\t"\
1514 "palignr $6, %%xmm0, %%xmm5 \n\t"\ 1514 "palignr $10,%%xmm0, %%xmm11\n\t"\
1515 "palignr $6, %%xmm7, %%xmm10\n\t"\ 1515 "palignr $10,%%xmm7, %%xmm12\n\t"\
1516 "palignr $8, %%xmm0, %%xmm4 \n\t"\ 1516 "palignr $2, %%xmm0, %%xmm4 \n\t"\
1517 "palignr $8, %%xmm7, %%xmm9 \n\t"\ 1517 "palignr $2, %%xmm7, %%xmm9 \n\t"\
1518 "palignr $10,%%xmm0, %%xmm3 \n\t"\ 1518 "palignr $4, %%xmm0, %%xmm3 \n\t"\
1519 "palignr $10,%%xmm7, %%xmm8 \n\t"\ 1519 "palignr $4, %%xmm7, %%xmm8 \n\t"\
1520 "paddw %%xmm1, %%xmm5 \n\t"\ 1520 "palignr $6, %%xmm0, %%xmm2 \n\t"\
1521 "paddw %%xmm0, %%xmm10 \n\t"\ 1521 "palignr $6, %%xmm7, %%xmm6 \n\t"\
1522 "palignr $12,%%xmm0, %%xmm2 \n\t"\ 1522 "paddw %%xmm0 ,%%xmm11 \n\t"\
1523 "palignr $12,%%xmm7, %%xmm6 \n\t"\ 1523 "palignr $8, %%xmm0, %%xmm1 \n\t"\
1524 "palignr $14,%%xmm0, %%xmm1 \n\t"\ 1524 "palignr $8, %%xmm7, %%xmm0 \n\t"\
1525 "palignr $14,%%xmm7, %%xmm0 \n\t"\ 1525 "paddw %%xmm12,%%xmm7 \n\t"\
1526 "paddw %%xmm3, %%xmm2 \n\t"\ 1526 "paddw %%xmm3, %%xmm2 \n\t"\
1527 "paddw %%xmm8, %%xmm6 \n\t"\ 1527 "paddw %%xmm8, %%xmm6 \n\t"\
1528 "paddw %%xmm4, %%xmm1 \n\t"\ 1528 "paddw %%xmm4, %%xmm1 \n\t"\
1529 "paddw %%xmm9, %%xmm0 \n\t"\ 1529 "paddw %%xmm9, %%xmm0 \n\t"\
1530 "psllw $2, %%xmm2 \n\t"\ 1530 "psllw $2, %%xmm2 \n\t"\
1531 "psllw $2, %%xmm6 \n\t"\ 1531 "psllw $2, %%xmm6 \n\t"\
1532 "psubw %%xmm1, %%xmm2 \n\t"\ 1532 "psubw %%xmm1, %%xmm2 \n\t"\
1533 "psubw %%xmm0, %%xmm6 \n\t"\ 1533 "psubw %%xmm0, %%xmm6 \n\t"\
1534 "paddw %%xmm13,%%xmm5 \n\t"\ 1534 "paddw %%xmm13,%%xmm11 \n\t"\
1535 "paddw %%xmm13,%%xmm10 \n\t"\ 1535 "paddw %%xmm13,%%xmm7 \n\t"\
1536 "pmullw %%xmm14,%%xmm2 \n\t"\ 1536 "pmullw %%xmm14,%%xmm2 \n\t"\
1537 "pmullw %%xmm14,%%xmm6 \n\t"\ 1537 "pmullw %%xmm14,%%xmm6 \n\t"\
1538 "lddqu (%2), %%xmm3 \n\t"\ 1538 "lddqu (%2), %%xmm3 \n\t"\
1539 "paddw %%xmm5, %%xmm2 \n\t"\ 1539 "paddw %%xmm11,%%xmm2 \n\t"\
1540 "paddw %%xmm10,%%xmm6 \n\t"\ 1540 "paddw %%xmm7, %%xmm6 \n\t"\
1541 "psraw $5, %%xmm2 \n\t"\ 1541 "psraw $5, %%xmm2 \n\t"\
1542 "psraw $5, %%xmm6 \n\t"\ 1542 "psraw $5, %%xmm6 \n\t"\
1543 "packuswb %%xmm2,%%xmm6 \n\t"\ 1543 "packuswb %%xmm2,%%xmm6 \n\t"\
1544 "pavgb %%xmm3, %%xmm6 \n\t"\ 1544 "pavgb %%xmm3, %%xmm6 \n\t"\
1545 OP(%%xmm6, (%1), %%xmm4, dqa)\ 1545 OP(%%xmm6, (%1), %%xmm4, dqa)\
1575 "movdqa %0, %%xmm6 \n\t"\ 1575 "movdqa %0, %%xmm6 \n\t"\
1576 :: "m"(ff_pw_5)\ 1576 :: "m"(ff_pw_5)\
1577 );\ 1577 );\
1578 do{\ 1578 do{\
1579 __asm__ volatile(\ 1579 __asm__ volatile(\
1580 "lddqu -5(%0), %%xmm1 \n\t"\ 1580 "lddqu -2(%0), %%xmm1 \n\t"\
1581 "movdqa %%xmm1, %%xmm0 \n\t"\ 1581 "movdqa %%xmm1, %%xmm0 \n\t"\
1582 "punpckhbw %%xmm7, %%xmm1 \n\t"\ 1582 "punpckhbw %%xmm7, %%xmm1 \n\t"\
1583 "punpcklbw %%xmm7, %%xmm0 \n\t"\ 1583 "punpcklbw %%xmm7, %%xmm0 \n\t"\
1584 "movdqa %%xmm1, %%xmm2 \n\t"\ 1584 "movdqa %%xmm1, %%xmm2 \n\t"\
1585 "movdqa %%xmm1, %%xmm3 \n\t"\ 1585 "movdqa %%xmm1, %%xmm3 \n\t"\
1586 "movdqa %%xmm1, %%xmm4 \n\t"\ 1586 "movdqa %%xmm1, %%xmm4 \n\t"\
1587 "movdqa %%xmm1, %%xmm5 \n\t"\ 1587 "movdqa %%xmm1, %%xmm5 \n\t"\
1588 "palignr $6, %%xmm0, %%xmm5 \n\t"\ 1588 "palignr $2, %%xmm0, %%xmm4 \n\t"\
1589 "palignr $8, %%xmm0, %%xmm4 \n\t"\ 1589 "palignr $4, %%xmm0, %%xmm3 \n\t"\
1590 "palignr $10,%%xmm0, %%xmm3 \n\t"\ 1590 "palignr $6, %%xmm0, %%xmm2 \n\t"\
1591 "paddw %%xmm1, %%xmm5 \n\t"\ 1591 "palignr $8, %%xmm0, %%xmm1 \n\t"\
1592 "palignr $12,%%xmm0, %%xmm2 \n\t"\ 1592 "palignr $10,%%xmm0, %%xmm5 \n\t"\
1593 "palignr $14,%%xmm0, %%xmm1 \n\t"\ 1593 "paddw %%xmm5, %%xmm0 \n\t"\
1594 "paddw %%xmm3, %%xmm2 \n\t"\ 1594 "paddw %%xmm3, %%xmm2 \n\t"\
1595 "paddw %%xmm4, %%xmm1 \n\t"\ 1595 "paddw %%xmm4, %%xmm1 \n\t"\
1596 "psllw $2, %%xmm2 \n\t"\ 1596 "psllw $2, %%xmm2 \n\t"\
1597 "movq (%2), %%xmm3 \n\t"\ 1597 "movq (%2), %%xmm3 \n\t"\
1598 "psubw %%xmm1, %%xmm2 \n\t"\ 1598 "psubw %%xmm1, %%xmm2 \n\t"\
1599 "paddw %5, %%xmm5 \n\t"\ 1599 "paddw %5, %%xmm0 \n\t"\
1600 "pmullw %%xmm6, %%xmm2 \n\t"\ 1600 "pmullw %%xmm6, %%xmm2 \n\t"\
1601 "paddw %%xmm5, %%xmm2 \n\t"\ 1601 "paddw %%xmm0, %%xmm2 \n\t"\
1602 "psraw $5, %%xmm2 \n\t"\ 1602 "psraw $5, %%xmm2 \n\t"\
1603 "packuswb %%xmm2, %%xmm2 \n\t"\ 1603 "packuswb %%xmm2, %%xmm2 \n\t"\
1604 "pavgb %%xmm3, %%xmm2 \n\t"\ 1604 "pavgb %%xmm3, %%xmm2 \n\t"\
1605 OP(%%xmm2, (%1), %%xmm4, q)\ 1605 OP(%%xmm2, (%1), %%xmm4, q)\
1606 "add %4, %0 \n\t"\ 1606 "add %4, %0 \n\t"\
1619 int h=8;\ 1619 int h=8;\
1620 __asm__ volatile(\ 1620 __asm__ volatile(\
1621 "pxor %%xmm7, %%xmm7 \n\t"\ 1621 "pxor %%xmm7, %%xmm7 \n\t"\
1622 "movdqa %5, %%xmm6 \n\t"\ 1622 "movdqa %5, %%xmm6 \n\t"\
1623 "1: \n\t"\ 1623 "1: \n\t"\
1624 "lddqu -5(%0), %%xmm1 \n\t"\ 1624 "lddqu -2(%0), %%xmm1 \n\t"\
1625 "movdqa %%xmm1, %%xmm0 \n\t"\ 1625 "movdqa %%xmm1, %%xmm0 \n\t"\
1626 "punpckhbw %%xmm7, %%xmm1 \n\t"\ 1626 "punpckhbw %%xmm7, %%xmm1 \n\t"\
1627 "punpcklbw %%xmm7, %%xmm0 \n\t"\ 1627 "punpcklbw %%xmm7, %%xmm0 \n\t"\
1628 "movdqa %%xmm1, %%xmm2 \n\t"\ 1628 "movdqa %%xmm1, %%xmm2 \n\t"\
1629 "movdqa %%xmm1, %%xmm3 \n\t"\ 1629 "movdqa %%xmm1, %%xmm3 \n\t"\
1630 "movdqa %%xmm1, %%xmm4 \n\t"\ 1630 "movdqa %%xmm1, %%xmm4 \n\t"\
1631 "movdqa %%xmm1, %%xmm5 \n\t"\ 1631 "movdqa %%xmm1, %%xmm5 \n\t"\
1632 "palignr $6, %%xmm0, %%xmm5 \n\t"\ 1632 "palignr $2, %%xmm0, %%xmm4 \n\t"\
1633 "palignr $8, %%xmm0, %%xmm4 \n\t"\ 1633 "palignr $4, %%xmm0, %%xmm3 \n\t"\
1634 "palignr $10,%%xmm0, %%xmm3 \n\t"\ 1634 "palignr $6, %%xmm0, %%xmm2 \n\t"\
1635 "paddw %%xmm1, %%xmm5 \n\t"\ 1635 "palignr $8, %%xmm0, %%xmm1 \n\t"\
1636 "palignr $12,%%xmm0, %%xmm2 \n\t"\ 1636 "palignr $10,%%xmm0, %%xmm5 \n\t"\
1637 "palignr $14,%%xmm0, %%xmm1 \n\t"\ 1637 "paddw %%xmm5, %%xmm0 \n\t"\
1638 "paddw %%xmm3, %%xmm2 \n\t"\ 1638 "paddw %%xmm3, %%xmm2 \n\t"\
1639 "paddw %%xmm4, %%xmm1 \n\t"\ 1639 "paddw %%xmm4, %%xmm1 \n\t"\
1640 "psllw $2, %%xmm2 \n\t"\ 1640 "psllw $2, %%xmm2 \n\t"\
1641 "psubw %%xmm1, %%xmm2 \n\t"\ 1641 "psubw %%xmm1, %%xmm2 \n\t"\
1642 "paddw %6, %%xmm5 \n\t"\ 1642 "paddw %6, %%xmm0 \n\t"\
1643 "pmullw %%xmm6, %%xmm2 \n\t"\ 1643 "pmullw %%xmm6, %%xmm2 \n\t"\
1644 "paddw %%xmm5, %%xmm2 \n\t"\ 1644 "paddw %%xmm0, %%xmm2 \n\t"\
1645 "psraw $5, %%xmm2 \n\t"\ 1645 "psraw $5, %%xmm2 \n\t"\
1646 "packuswb %%xmm2, %%xmm2 \n\t"\ 1646 "packuswb %%xmm2, %%xmm2 \n\t"\
1647 OP(%%xmm2, (%1), %%xmm4, q)\ 1647 OP(%%xmm2, (%1), %%xmm4, q)\
1648 "add %3, %0 \n\t"\ 1648 "add %3, %0 \n\t"\
1649 "add %4, %1 \n\t"\ 1649 "add %4, %1 \n\t"\