Mercurial > mplayer.hg
comparison postproc/swscale_template.c @ 16739:e91f944f6ed9
Change unsigned->signed and int->long, this fits the asm code better on 64
bit systems.
Also fixes several crashes because (long)-i is incorrect if i is unsigned.
author | reimar |
---|---|
date | Wed, 12 Oct 2005 12:11:27 +0000 |
parents | e4360060b79a |
children | 401b440a6d76 |
comparison
equal
deleted
inserted
replaced
16738:482b7842476e | 16739:e91f944f6ed9 |
---|---|
755 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index) | 755 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index) |
756 | 756 |
757 | 757 |
758 static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, | 758 static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, |
759 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, | 759 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, |
760 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW) | 760 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW) |
761 { | 761 { |
762 #ifdef HAVE_MMX | 762 #ifdef HAVE_MMX |
763 if(uDest != NULL) | 763 if(uDest != NULL) |
764 { | 764 { |
765 asm volatile( | 765 asm volatile( |
766 YSCALEYUV2YV12X(0, CHR_MMX_FILTER_OFFSET) | 766 YSCALEYUV2YV12X(0, CHR_MMX_FILTER_OFFSET) |
767 :: "r" (&c->redDither), | 767 :: "r" (&c->redDither), |
768 "r" (uDest), "p" ((long)chrDstW) | 768 "r" (uDest), "p" (chrDstW) |
769 : "%"REG_a, "%"REG_d, "%"REG_S | 769 : "%"REG_a, "%"REG_d, "%"REG_S |
770 ); | 770 ); |
771 | 771 |
772 asm volatile( | 772 asm volatile( |
773 YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET) | 773 YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET) |
774 :: "r" (&c->redDither), | 774 :: "r" (&c->redDither), |
775 "r" (vDest), "p" ((long)chrDstW) | 775 "r" (vDest), "p" (chrDstW) |
776 : "%"REG_a, "%"REG_d, "%"REG_S | 776 : "%"REG_a, "%"REG_d, "%"REG_S |
777 ); | 777 ); |
778 } | 778 } |
779 | 779 |
780 asm volatile( | 780 asm volatile( |
781 YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET) | 781 YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET) |
782 :: "r" (&c->redDither), | 782 :: "r" (&c->redDither), |
783 "r" (dest), "p" ((long)dstW) | 783 "r" (dest), "p" (dstW) |
784 : "%"REG_a, "%"REG_d, "%"REG_S | 784 : "%"REG_a, "%"REG_d, "%"REG_S |
785 ); | 785 ); |
786 #else | 786 #else |
787 #ifdef HAVE_ALTIVEC | 787 #ifdef HAVE_ALTIVEC |
788 yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize, | 788 yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize, |
804 chrFilter, chrSrc, chrFilterSize, | 804 chrFilter, chrSrc, chrFilterSize, |
805 dest, uDest, dstW, chrDstW, dstFormat); | 805 dest, uDest, dstW, chrDstW, dstFormat); |
806 } | 806 } |
807 | 807 |
808 static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc, | 808 static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc, |
809 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW) | 809 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW) |
810 { | 810 { |
811 #ifdef HAVE_MMX | 811 #ifdef HAVE_MMX |
812 if(uDest != NULL) | 812 if(uDest != NULL) |
813 { | 813 { |
814 asm volatile( | 814 asm volatile( |
815 YSCALEYUV2YV121 | 815 YSCALEYUV2YV121 |
816 :: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW), | 816 :: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW), |
817 "g" ((long)-chrDstW) | 817 "g" (-chrDstW) |
818 : "%"REG_a | 818 : "%"REG_a |
819 ); | 819 ); |
820 | 820 |
821 asm volatile( | 821 asm volatile( |
822 YSCALEYUV2YV121 | 822 YSCALEYUV2YV121 |
823 :: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW), | 823 :: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW), |
824 "g" ((long)-chrDstW) | 824 "g" (-chrDstW) |
825 : "%"REG_a | 825 : "%"REG_a |
826 ); | 826 ); |
827 } | 827 } |
828 | 828 |
829 asm volatile( | 829 asm volatile( |
830 YSCALEYUV2YV121 | 830 YSCALEYUV2YV121 |
831 :: "r" (lumSrc + dstW), "r" (dest + dstW), | 831 :: "r" (lumSrc + dstW), "r" (dest + dstW), |
832 "g" ((long)-dstW) | 832 "g" (-dstW) |
833 : "%"REG_a | 833 : "%"REG_a |
834 ); | 834 ); |
835 #else | 835 #else |
836 int i; | 836 int i; |
837 for(i=0; i<dstW; i++) | 837 for(i=0; i<dstW; i++) |
1487 } | 1487 } |
1488 } | 1488 } |
1489 | 1489 |
1490 //FIXME yuy2* can read upto 7 samples to much | 1490 //FIXME yuy2* can read upto 7 samples to much |
1491 | 1491 |
1492 static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, int width) | 1492 static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width) |
1493 { | 1493 { |
1494 #ifdef HAVE_MMX | 1494 #ifdef HAVE_MMX |
1495 asm volatile( | 1495 asm volatile( |
1496 "movq "MANGLE(bm01010101)", %%mm2\n\t" | 1496 "movq "MANGLE(bm01010101)", %%mm2\n\t" |
1497 "mov %0, %%"REG_a" \n\t" | 1497 "mov %0, %%"REG_a" \n\t" |
1502 "pand %%mm2, %%mm1 \n\t" | 1502 "pand %%mm2, %%mm1 \n\t" |
1503 "packuswb %%mm1, %%mm0 \n\t" | 1503 "packuswb %%mm1, %%mm0 \n\t" |
1504 "movq %%mm0, (%2, %%"REG_a") \n\t" | 1504 "movq %%mm0, (%2, %%"REG_a") \n\t" |
1505 "add $8, %%"REG_a" \n\t" | 1505 "add $8, %%"REG_a" \n\t" |
1506 " js 1b \n\t" | 1506 " js 1b \n\t" |
1507 : : "g" ((long)-width), "r" (src+width*2), "r" (dst+width) | 1507 : : "g" (-width), "r" (src+width*2), "r" (dst+width) |
1508 : "%"REG_a | 1508 : "%"REG_a |
1509 ); | 1509 ); |
1510 #else | 1510 #else |
1511 int i; | 1511 int i; |
1512 for(i=0; i<width; i++) | 1512 for(i=0; i<width; i++) |
1513 dst[i]= src[2*i]; | 1513 dst[i]= src[2*i]; |
1514 #endif | 1514 #endif |
1515 } | 1515 } |
1516 | 1516 |
1517 static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | 1517 static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width) |
1518 { | 1518 { |
1519 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1519 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
1520 asm volatile( | 1520 asm volatile( |
1521 "movq "MANGLE(bm01010101)", %%mm4\n\t" | 1521 "movq "MANGLE(bm01010101)", %%mm4\n\t" |
1522 "mov %0, %%"REG_a" \n\t" | 1522 "mov %0, %%"REG_a" \n\t" |
1537 "packuswb %%mm1, %%mm1 \n\t" | 1537 "packuswb %%mm1, %%mm1 \n\t" |
1538 "movd %%mm0, (%4, %%"REG_a") \n\t" | 1538 "movd %%mm0, (%4, %%"REG_a") \n\t" |
1539 "movd %%mm1, (%3, %%"REG_a") \n\t" | 1539 "movd %%mm1, (%3, %%"REG_a") \n\t" |
1540 "add $4, %%"REG_a" \n\t" | 1540 "add $4, %%"REG_a" \n\t" |
1541 " js 1b \n\t" | 1541 " js 1b \n\t" |
1542 : : "g" ((long)-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width) | 1542 : : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width) |
1543 : "%"REG_a | 1543 : "%"REG_a |
1544 ); | 1544 ); |
1545 #else | 1545 #else |
1546 int i; | 1546 int i; |
1547 for(i=0; i<width; i++) | 1547 for(i=0; i<width; i++) |
1551 } | 1551 } |
1552 #endif | 1552 #endif |
1553 } | 1553 } |
1554 | 1554 |
1555 //this is allmost identical to the previous, end exists only cuz yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses | 1555 //this is allmost identical to the previous, end exists only cuz yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses |
1556 static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, int width) | 1556 static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width) |
1557 { | 1557 { |
1558 #ifdef HAVE_MMX | 1558 #ifdef HAVE_MMX |
1559 asm volatile( | 1559 asm volatile( |
1560 "mov %0, %%"REG_a" \n\t" | 1560 "mov %0, %%"REG_a" \n\t" |
1561 "1: \n\t" | 1561 "1: \n\t" |
1565 "psrlw $8, %%mm1 \n\t" | 1565 "psrlw $8, %%mm1 \n\t" |
1566 "packuswb %%mm1, %%mm0 \n\t" | 1566 "packuswb %%mm1, %%mm0 \n\t" |
1567 "movq %%mm0, (%2, %%"REG_a") \n\t" | 1567 "movq %%mm0, (%2, %%"REG_a") \n\t" |
1568 "add $8, %%"REG_a" \n\t" | 1568 "add $8, %%"REG_a" \n\t" |
1569 " js 1b \n\t" | 1569 " js 1b \n\t" |
1570 : : "g" ((long)-width), "r" (src+width*2), "r" (dst+width) | 1570 : : "g" (-width), "r" (src+width*2), "r" (dst+width) |
1571 : "%"REG_a | 1571 : "%"REG_a |
1572 ); | 1572 ); |
1573 #else | 1573 #else |
1574 int i; | 1574 int i; |
1575 for(i=0; i<width; i++) | 1575 for(i=0; i<width; i++) |
1576 dst[i]= src[2*i+1]; | 1576 dst[i]= src[2*i+1]; |
1577 #endif | 1577 #endif |
1578 } | 1578 } |
1579 | 1579 |
1580 static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | 1580 static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width) |
1581 { | 1581 { |
1582 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1582 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
1583 asm volatile( | 1583 asm volatile( |
1584 "movq "MANGLE(bm01010101)", %%mm4\n\t" | 1584 "movq "MANGLE(bm01010101)", %%mm4\n\t" |
1585 "mov %0, %%"REG_a" \n\t" | 1585 "mov %0, %%"REG_a" \n\t" |
1600 "packuswb %%mm1, %%mm1 \n\t" | 1600 "packuswb %%mm1, %%mm1 \n\t" |
1601 "movd %%mm0, (%4, %%"REG_a") \n\t" | 1601 "movd %%mm0, (%4, %%"REG_a") \n\t" |
1602 "movd %%mm1, (%3, %%"REG_a") \n\t" | 1602 "movd %%mm1, (%3, %%"REG_a") \n\t" |
1603 "add $4, %%"REG_a" \n\t" | 1603 "add $4, %%"REG_a" \n\t" |
1604 " js 1b \n\t" | 1604 " js 1b \n\t" |
1605 : : "g" ((long)-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width) | 1605 : : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width) |
1606 : "%"REG_a | 1606 : "%"REG_a |
1607 ); | 1607 ); |
1608 #else | 1608 #else |
1609 int i; | 1609 int i; |
1610 for(i=0; i<width; i++) | 1610 for(i=0; i<width; i++) |
1646 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; | 1646 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; |
1647 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; | 1647 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; |
1648 } | 1648 } |
1649 } | 1649 } |
1650 | 1650 |
1651 static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, int width) | 1651 static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width) |
1652 { | 1652 { |
1653 #ifdef HAVE_MMX | 1653 #ifdef HAVE_MMX |
1654 asm volatile( | 1654 asm volatile( |
1655 "mov %2, %%"REG_a" \n\t" | 1655 "mov %2, %%"REG_a" \n\t" |
1656 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" | 1656 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" |
1715 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t" | 1715 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t" |
1716 | 1716 |
1717 "movq %%mm0, (%1, %%"REG_a") \n\t" | 1717 "movq %%mm0, (%1, %%"REG_a") \n\t" |
1718 "add $8, %%"REG_a" \n\t" | 1718 "add $8, %%"REG_a" \n\t" |
1719 " js 1b \n\t" | 1719 " js 1b \n\t" |
1720 : : "r" (src+width*3), "r" (dst+width), "g" ((long)-width) | 1720 : : "r" (src+width*3), "r" (dst+width), "g" (-width) |
1721 : "%"REG_a, "%"REG_b | 1721 : "%"REG_a, "%"REG_b |
1722 ); | 1722 ); |
1723 #else | 1723 #else |
1724 int i; | 1724 int i; |
1725 for(i=0; i<width; i++) | 1725 for(i=0; i<width; i++) |
1731 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT); | 1731 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT); |
1732 } | 1732 } |
1733 #endif | 1733 #endif |
1734 } | 1734 } |
1735 | 1735 |
1736 static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | 1736 static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width) |
1737 { | 1737 { |
1738 #ifdef HAVE_MMX | 1738 #ifdef HAVE_MMX |
1739 asm volatile( | 1739 asm volatile( |
1740 "mov %4, %%"REG_a" \n\t" | 1740 "mov %4, %%"REG_a" \n\t" |
1741 "movq "MANGLE(w1111)", %%mm5 \n\t" | 1741 "movq "MANGLE(w1111)", %%mm5 \n\t" |
1880 "movd %%mm0, (%2, %%"REG_a") \n\t" | 1880 "movd %%mm0, (%2, %%"REG_a") \n\t" |
1881 "punpckhdq %%mm0, %%mm0 \n\t" | 1881 "punpckhdq %%mm0, %%mm0 \n\t" |
1882 "movd %%mm0, (%3, %%"REG_a") \n\t" | 1882 "movd %%mm0, (%3, %%"REG_a") \n\t" |
1883 "add $4, %%"REG_a" \n\t" | 1883 "add $4, %%"REG_a" \n\t" |
1884 " js 1b \n\t" | 1884 " js 1b \n\t" |
1885 : : "r" (src1+width*6), "r" (src2+width*6), "r" (dstU+width), "r" (dstV+width), "g" ((long)-width) | 1885 : : "r" (src1+width*6), "r" (src2+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width) |
1886 : "%"REG_a, "%"REG_b | 1886 : "%"REG_a, "%"REG_b |
1887 ); | 1887 ); |
1888 #else | 1888 #else |
1889 int i; | 1889 int i; |
1890 for(i=0; i<width; i++) | 1890 for(i=0; i<width; i++) |
2033 } | 2033 } |
2034 | 2034 |
2035 | 2035 |
2036 // Bilinear / Bicubic scaling | 2036 // Bilinear / Bicubic scaling |
2037 static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc, | 2037 static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc, |
2038 int16_t *filter, int16_t *filterPos, int filterSize) | 2038 int16_t *filter, int16_t *filterPos, long filterSize) |
2039 { | 2039 { |
2040 #ifdef HAVE_MMX | 2040 #ifdef HAVE_MMX |
2041 assert(filterSize % 4 == 0 && filterSize>0); | 2041 assert(filterSize % 4 == 0 && filterSize>0); |
2042 if(filterSize==4) // allways true for upscaling, sometimes for down too | 2042 if(filterSize==4) // allways true for upscaling, sometimes for down too |
2043 { | 2043 { |
2171 "add $4, %0 \n\t" | 2171 "add $4, %0 \n\t" |
2172 " jnc 1b \n\t" | 2172 " jnc 1b \n\t" |
2173 | 2173 |
2174 : "+r" (counter), "+r" (filter) | 2174 : "+r" (counter), "+r" (filter) |
2175 : "m" (filterPos), "m" (dst), "m"(offset), | 2175 : "m" (filterPos), "m" (dst), "m"(offset), |
2176 "m" (src), "r" ((long)filterSize*2) | 2176 "m" (src), "r" (filterSize*2) |
2177 : "%"REG_b, "%"REG_a, "%"REG_c | 2177 : "%"REG_b, "%"REG_a, "%"REG_c |
2178 ); | 2178 ); |
2179 } | 2179 } |
2180 #else | 2180 #else |
2181 #ifdef HAVE_ALTIVEC | 2181 #ifdef HAVE_ALTIVEC |
2371 } | 2371 } |
2372 #endif | 2372 #endif |
2373 } | 2373 } |
2374 } | 2374 } |
2375 | 2375 |
2376 inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, uint8_t *src1, uint8_t *src2, | 2376 inline static void RENAME(hcscale)(uint16_t *dst, long dstWidth, uint8_t *src1, uint8_t *src2, |
2377 int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter, | 2377 int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter, |
2378 int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode, | 2378 int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode, |
2379 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter, | 2379 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter, |
2380 int32_t *mmx2FilterPos) | 2380 int32_t *mmx2FilterPos) |
2381 { | 2381 { |