comparison postproc/swscale_template.c @ 16739:e91f944f6ed9

Change unsigned->signed and int->long, this fits the asm code better on 64 bit systems. Also fixes several crashes because (long)-i is incorrect if i is unsigned.
author reimar
date Wed, 12 Oct 2005 12:11:27 +0000
parents e4360060b79a
children 401b440a6d76
comparison
equal deleted inserted replaced
16738:482b7842476e 16739:e91f944f6ed9
755 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index) 755 #define WRITEYUY2(dst, dstw, index) REAL_WRITEYUY2(dst, dstw, index)
756 756
757 757
758 static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, 758 static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
759 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, 759 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
760 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW) 760 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
761 { 761 {
762 #ifdef HAVE_MMX 762 #ifdef HAVE_MMX
763 if(uDest != NULL) 763 if(uDest != NULL)
764 { 764 {
765 asm volatile( 765 asm volatile(
766 YSCALEYUV2YV12X(0, CHR_MMX_FILTER_OFFSET) 766 YSCALEYUV2YV12X(0, CHR_MMX_FILTER_OFFSET)
767 :: "r" (&c->redDither), 767 :: "r" (&c->redDither),
768 "r" (uDest), "p" ((long)chrDstW) 768 "r" (uDest), "p" (chrDstW)
769 : "%"REG_a, "%"REG_d, "%"REG_S 769 : "%"REG_a, "%"REG_d, "%"REG_S
770 ); 770 );
771 771
772 asm volatile( 772 asm volatile(
773 YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET) 773 YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET)
774 :: "r" (&c->redDither), 774 :: "r" (&c->redDither),
775 "r" (vDest), "p" ((long)chrDstW) 775 "r" (vDest), "p" (chrDstW)
776 : "%"REG_a, "%"REG_d, "%"REG_S 776 : "%"REG_a, "%"REG_d, "%"REG_S
777 ); 777 );
778 } 778 }
779 779
780 asm volatile( 780 asm volatile(
781 YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET) 781 YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET)
782 :: "r" (&c->redDither), 782 :: "r" (&c->redDither),
783 "r" (dest), "p" ((long)dstW) 783 "r" (dest), "p" (dstW)
784 : "%"REG_a, "%"REG_d, "%"REG_S 784 : "%"REG_a, "%"REG_d, "%"REG_S
785 ); 785 );
786 #else 786 #else
787 #ifdef HAVE_ALTIVEC 787 #ifdef HAVE_ALTIVEC
788 yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize, 788 yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
804 chrFilter, chrSrc, chrFilterSize, 804 chrFilter, chrSrc, chrFilterSize,
805 dest, uDest, dstW, chrDstW, dstFormat); 805 dest, uDest, dstW, chrDstW, dstFormat);
806 } 806 }
807 807
808 static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc, 808 static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc,
809 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW) 809 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
810 { 810 {
811 #ifdef HAVE_MMX 811 #ifdef HAVE_MMX
812 if(uDest != NULL) 812 if(uDest != NULL)
813 { 813 {
814 asm volatile( 814 asm volatile(
815 YSCALEYUV2YV121 815 YSCALEYUV2YV121
816 :: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW), 816 :: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW),
817 "g" ((long)-chrDstW) 817 "g" (-chrDstW)
818 : "%"REG_a 818 : "%"REG_a
819 ); 819 );
820 820
821 asm volatile( 821 asm volatile(
822 YSCALEYUV2YV121 822 YSCALEYUV2YV121
823 :: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW), 823 :: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW),
824 "g" ((long)-chrDstW) 824 "g" (-chrDstW)
825 : "%"REG_a 825 : "%"REG_a
826 ); 826 );
827 } 827 }
828 828
829 asm volatile( 829 asm volatile(
830 YSCALEYUV2YV121 830 YSCALEYUV2YV121
831 :: "r" (lumSrc + dstW), "r" (dest + dstW), 831 :: "r" (lumSrc + dstW), "r" (dest + dstW),
832 "g" ((long)-dstW) 832 "g" (-dstW)
833 : "%"REG_a 833 : "%"REG_a
834 ); 834 );
835 #else 835 #else
836 int i; 836 int i;
837 for(i=0; i<dstW; i++) 837 for(i=0; i<dstW; i++)
1487 } 1487 }
1488 } 1488 }
1489 1489
1490 //FIXME yuy2* can read upto 7 samples to much 1490 //FIXME yuy2* can read upto 7 samples to much
1491 1491
1492 static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, int width) 1492 static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width)
1493 { 1493 {
1494 #ifdef HAVE_MMX 1494 #ifdef HAVE_MMX
1495 asm volatile( 1495 asm volatile(
1496 "movq "MANGLE(bm01010101)", %%mm2\n\t" 1496 "movq "MANGLE(bm01010101)", %%mm2\n\t"
1497 "mov %0, %%"REG_a" \n\t" 1497 "mov %0, %%"REG_a" \n\t"
1502 "pand %%mm2, %%mm1 \n\t" 1502 "pand %%mm2, %%mm1 \n\t"
1503 "packuswb %%mm1, %%mm0 \n\t" 1503 "packuswb %%mm1, %%mm0 \n\t"
1504 "movq %%mm0, (%2, %%"REG_a") \n\t" 1504 "movq %%mm0, (%2, %%"REG_a") \n\t"
1505 "add $8, %%"REG_a" \n\t" 1505 "add $8, %%"REG_a" \n\t"
1506 " js 1b \n\t" 1506 " js 1b \n\t"
1507 : : "g" ((long)-width), "r" (src+width*2), "r" (dst+width) 1507 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
1508 : "%"REG_a 1508 : "%"REG_a
1509 ); 1509 );
1510 #else 1510 #else
1511 int i; 1511 int i;
1512 for(i=0; i<width; i++) 1512 for(i=0; i<width; i++)
1513 dst[i]= src[2*i]; 1513 dst[i]= src[2*i];
1514 #endif 1514 #endif
1515 } 1515 }
1516 1516
1517 static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) 1517 static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
1518 { 1518 {
1519 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1519 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1520 asm volatile( 1520 asm volatile(
1521 "movq "MANGLE(bm01010101)", %%mm4\n\t" 1521 "movq "MANGLE(bm01010101)", %%mm4\n\t"
1522 "mov %0, %%"REG_a" \n\t" 1522 "mov %0, %%"REG_a" \n\t"
1537 "packuswb %%mm1, %%mm1 \n\t" 1537 "packuswb %%mm1, %%mm1 \n\t"
1538 "movd %%mm0, (%4, %%"REG_a") \n\t" 1538 "movd %%mm0, (%4, %%"REG_a") \n\t"
1539 "movd %%mm1, (%3, %%"REG_a") \n\t" 1539 "movd %%mm1, (%3, %%"REG_a") \n\t"
1540 "add $4, %%"REG_a" \n\t" 1540 "add $4, %%"REG_a" \n\t"
1541 " js 1b \n\t" 1541 " js 1b \n\t"
1542 : : "g" ((long)-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width) 1542 : : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width)
1543 : "%"REG_a 1543 : "%"REG_a
1544 ); 1544 );
1545 #else 1545 #else
1546 int i; 1546 int i;
1547 for(i=0; i<width; i++) 1547 for(i=0; i<width; i++)
1551 } 1551 }
1552 #endif 1552 #endif
1553 } 1553 }
1554 1554
1555 //this is allmost identical to the previous, end exists only cuz yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses 1555 //this is allmost identical to the previous, end exists only cuz yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses
1556 static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, int width) 1556 static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width)
1557 { 1557 {
1558 #ifdef HAVE_MMX 1558 #ifdef HAVE_MMX
1559 asm volatile( 1559 asm volatile(
1560 "mov %0, %%"REG_a" \n\t" 1560 "mov %0, %%"REG_a" \n\t"
1561 "1: \n\t" 1561 "1: \n\t"
1565 "psrlw $8, %%mm1 \n\t" 1565 "psrlw $8, %%mm1 \n\t"
1566 "packuswb %%mm1, %%mm0 \n\t" 1566 "packuswb %%mm1, %%mm0 \n\t"
1567 "movq %%mm0, (%2, %%"REG_a") \n\t" 1567 "movq %%mm0, (%2, %%"REG_a") \n\t"
1568 "add $8, %%"REG_a" \n\t" 1568 "add $8, %%"REG_a" \n\t"
1569 " js 1b \n\t" 1569 " js 1b \n\t"
1570 : : "g" ((long)-width), "r" (src+width*2), "r" (dst+width) 1570 : : "g" (-width), "r" (src+width*2), "r" (dst+width)
1571 : "%"REG_a 1571 : "%"REG_a
1572 ); 1572 );
1573 #else 1573 #else
1574 int i; 1574 int i;
1575 for(i=0; i<width; i++) 1575 for(i=0; i<width; i++)
1576 dst[i]= src[2*i+1]; 1576 dst[i]= src[2*i+1];
1577 #endif 1577 #endif
1578 } 1578 }
1579 1579
1580 static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) 1580 static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
1581 { 1581 {
1582 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1582 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1583 asm volatile( 1583 asm volatile(
1584 "movq "MANGLE(bm01010101)", %%mm4\n\t" 1584 "movq "MANGLE(bm01010101)", %%mm4\n\t"
1585 "mov %0, %%"REG_a" \n\t" 1585 "mov %0, %%"REG_a" \n\t"
1600 "packuswb %%mm1, %%mm1 \n\t" 1600 "packuswb %%mm1, %%mm1 \n\t"
1601 "movd %%mm0, (%4, %%"REG_a") \n\t" 1601 "movd %%mm0, (%4, %%"REG_a") \n\t"
1602 "movd %%mm1, (%3, %%"REG_a") \n\t" 1602 "movd %%mm1, (%3, %%"REG_a") \n\t"
1603 "add $4, %%"REG_a" \n\t" 1603 "add $4, %%"REG_a" \n\t"
1604 " js 1b \n\t" 1604 " js 1b \n\t"
1605 : : "g" ((long)-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width) 1605 : : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width)
1606 : "%"REG_a 1606 : "%"REG_a
1607 ); 1607 );
1608 #else 1608 #else
1609 int i; 1609 int i;
1610 for(i=0; i<width; i++) 1610 for(i=0; i<width; i++)
1646 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; 1646 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
1647 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; 1647 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
1648 } 1648 }
1649 } 1649 }
1650 1650
1651 static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, int width) 1651 static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width)
1652 { 1652 {
1653 #ifdef HAVE_MMX 1653 #ifdef HAVE_MMX
1654 asm volatile( 1654 asm volatile(
1655 "mov %2, %%"REG_a" \n\t" 1655 "mov %2, %%"REG_a" \n\t"
1656 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" 1656 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t"
1715 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t" 1715 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t"
1716 1716
1717 "movq %%mm0, (%1, %%"REG_a") \n\t" 1717 "movq %%mm0, (%1, %%"REG_a") \n\t"
1718 "add $8, %%"REG_a" \n\t" 1718 "add $8, %%"REG_a" \n\t"
1719 " js 1b \n\t" 1719 " js 1b \n\t"
1720 : : "r" (src+width*3), "r" (dst+width), "g" ((long)-width) 1720 : : "r" (src+width*3), "r" (dst+width), "g" (-width)
1721 : "%"REG_a, "%"REG_b 1721 : "%"REG_a, "%"REG_b
1722 ); 1722 );
1723 #else 1723 #else
1724 int i; 1724 int i;
1725 for(i=0; i<width; i++) 1725 for(i=0; i<width; i++)
1731 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT); 1731 dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
1732 } 1732 }
1733 #endif 1733 #endif
1734 } 1734 }
1735 1735
1736 static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) 1736 static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
1737 { 1737 {
1738 #ifdef HAVE_MMX 1738 #ifdef HAVE_MMX
1739 asm volatile( 1739 asm volatile(
1740 "mov %4, %%"REG_a" \n\t" 1740 "mov %4, %%"REG_a" \n\t"
1741 "movq "MANGLE(w1111)", %%mm5 \n\t" 1741 "movq "MANGLE(w1111)", %%mm5 \n\t"
1880 "movd %%mm0, (%2, %%"REG_a") \n\t" 1880 "movd %%mm0, (%2, %%"REG_a") \n\t"
1881 "punpckhdq %%mm0, %%mm0 \n\t" 1881 "punpckhdq %%mm0, %%mm0 \n\t"
1882 "movd %%mm0, (%3, %%"REG_a") \n\t" 1882 "movd %%mm0, (%3, %%"REG_a") \n\t"
1883 "add $4, %%"REG_a" \n\t" 1883 "add $4, %%"REG_a" \n\t"
1884 " js 1b \n\t" 1884 " js 1b \n\t"
1885 : : "r" (src1+width*6), "r" (src2+width*6), "r" (dstU+width), "r" (dstV+width), "g" ((long)-width) 1885 : : "r" (src1+width*6), "r" (src2+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width)
1886 : "%"REG_a, "%"REG_b 1886 : "%"REG_a, "%"REG_b
1887 ); 1887 );
1888 #else 1888 #else
1889 int i; 1889 int i;
1890 for(i=0; i<width; i++) 1890 for(i=0; i<width; i++)
2033 } 2033 }
2034 2034
2035 2035
2036 // Bilinear / Bicubic scaling 2036 // Bilinear / Bicubic scaling
2037 static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc, 2037 static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc,
2038 int16_t *filter, int16_t *filterPos, int filterSize) 2038 int16_t *filter, int16_t *filterPos, long filterSize)
2039 { 2039 {
2040 #ifdef HAVE_MMX 2040 #ifdef HAVE_MMX
2041 assert(filterSize % 4 == 0 && filterSize>0); 2041 assert(filterSize % 4 == 0 && filterSize>0);
2042 if(filterSize==4) // allways true for upscaling, sometimes for down too 2042 if(filterSize==4) // allways true for upscaling, sometimes for down too
2043 { 2043 {
2171 "add $4, %0 \n\t" 2171 "add $4, %0 \n\t"
2172 " jnc 1b \n\t" 2172 " jnc 1b \n\t"
2173 2173
2174 : "+r" (counter), "+r" (filter) 2174 : "+r" (counter), "+r" (filter)
2175 : "m" (filterPos), "m" (dst), "m"(offset), 2175 : "m" (filterPos), "m" (dst), "m"(offset),
2176 "m" (src), "r" ((long)filterSize*2) 2176 "m" (src), "r" (filterSize*2)
2177 : "%"REG_b, "%"REG_a, "%"REG_c 2177 : "%"REG_b, "%"REG_a, "%"REG_c
2178 ); 2178 );
2179 } 2179 }
2180 #else 2180 #else
2181 #ifdef HAVE_ALTIVEC 2181 #ifdef HAVE_ALTIVEC
2371 } 2371 }
2372 #endif 2372 #endif
2373 } 2373 }
2374 } 2374 }
2375 2375
2376 inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, uint8_t *src1, uint8_t *src2, 2376 inline static void RENAME(hcscale)(uint16_t *dst, long dstWidth, uint8_t *src1, uint8_t *src2,
2377 int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter, 2377 int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter,
2378 int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode, 2378 int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode,
2379 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter, 2379 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
2380 int32_t *mmx2FilterPos) 2380 int32_t *mmx2FilterPos)
2381 { 2381 {