Mercurial > libavcodec.hg
comparison libpostproc/postprocess_template.c @ 106:389391a6d0bf libavcodec
rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
added deinterlace filters (linear interpolate, linear blend, median)
minor cleanups (removed some outcommented stuff)
author | michael |
---|---|
date | Mon, 15 Oct 2001 03:01:08 +0000 |
parents | a2f94bfb5793 |
children | bd163e13a0fb |
comparison
equal
deleted
inserted
replaced
105:a2f94bfb5793 | 106:389391a6d0bf |
---|---|
15 along with this program; if not, write to the Free Software | 15 along with this program; if not, write to the Free Software |
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 */ | 17 */ |
18 | 18 |
19 /* | 19 /* |
20 C MMX MMX2 3DNow* | 20 C MMX MMX2 3DNow |
21 isVertDC Ec Ec | 21 isVertDC Ec Ec |
22 isVertMinMaxOk Ec Ec | 22 isVertMinMaxOk Ec Ec |
23 doVertLowPass E e e* | 23 doVertLowPass E e e |
24 doVertDefFilter Ec Ec Ec | 24 doVertDefFilter Ec Ec Ec |
25 isHorizDC Ec Ec | 25 isHorizDC Ec Ec |
26 isHorizMinMaxOk a | 26 isHorizMinMaxOk a |
27 doHorizLowPass E a a* | 27 doHorizLowPass E a a |
28 doHorizDefFilter E ac ac | 28 doHorizDefFilter E ac ac |
29 deRing | 29 deRing |
30 Vertical RKAlgo1 E a a* | 30 Vertical RKAlgo1 E a a |
31 Vertical X1 a E E* | 31 Vertical X1 a E E |
32 Horizontal X1 a E E* | 32 Horizontal X1 a E E |
33 LinIpolDeinterlace a E E* | |
34 LinBlendDeinterlace a E E* | |
35 MedianDeinterlace a E | |
33 | 36 |
34 | 37 |
35 * i dont have a 3dnow CPU -> its untested | 38 * i dont have a 3dnow CPU -> its untested |
36 E = Exact implementation | 39 E = Exact implementation |
37 e = allmost exact implementation | 40 e = allmost exact implementation |
53 make the mainloop more flexible (variable number of blocks at once | 56 make the mainloop more flexible (variable number of blocks at once |
54 (the if/else stuff per block is slowing things down) | 57 (the if/else stuff per block is slowing things down) |
55 compare the quality & speed of all filters | 58 compare the quality & speed of all filters |
56 implement a few simple deinterlacing filters | 59 implement a few simple deinterlacing filters |
57 split this huge file | 60 split this huge file |
61 fix warnings (unused vars, ...) | |
58 ... | 62 ... |
59 | 63 |
60 Notes: | 64 Notes: |
61 | 65 |
62 */ | 66 */ |
63 | 67 |
64 /* | 68 /* |
65 Changelog: use the CVS log | 69 Changelog: use the CVS log |
70 rewrote the horizontal lowpass filter to fix a bug which caused a blocky look | |
71 added deinterlace filters (linear interpolate, linear blend, median) | |
72 minor cleanups (removed some outcommented stuff) | |
66 0.1.3 | 73 0.1.3 |
67 bugfixes: last 3 lines not brightness/contrast corrected | 74 bugfixes: last 3 lines not brightness/contrast corrected |
68 brightness statistics messed up with initial black pic | 75 brightness statistics messed up with initial black pic |
69 changed initial values of the brightness statistics | 76 changed initial values of the brightness statistics |
70 C++ -> C conversation | 77 C++ -> C conversation |
192 //FIXME? |255-0| = 1 (shouldnt be a problem ...) | 199 //FIXME? |255-0| = 1 (shouldnt be a problem ...) |
193 /** | 200 /** |
194 * Check if the middle 8x8 Block in the given 8x10 block is flat | 201 * Check if the middle 8x8 Block in the given 8x10 block is flat |
195 */ | 202 */ |
196 static inline int isVertDC(uint8_t src[], int stride){ | 203 static inline int isVertDC(uint8_t src[], int stride){ |
197 // return true; | |
198 int numEq= 0; | 204 int numEq= 0; |
199 int y; | 205 int y; |
200 src+= stride; // src points to begin of the 8x8 Block | 206 src+= stride; // src points to begin of the 8x8 Block |
201 #ifdef HAVE_MMX | 207 #ifdef HAVE_MMX |
202 asm volatile( | 208 asm volatile( |
203 // "int $3 \n\t" | |
204 "pushl %1\n\t" | 209 "pushl %1\n\t" |
205 "movq b7E, %%mm7 \n\t" // mm7 = 0x7F | 210 "movq b7E, %%mm7 \n\t" // mm7 = 0x7F |
206 "movq b7C, %%mm6 \n\t" // mm6 = 0x7D | 211 "movq b7C, %%mm6 \n\t" // mm6 = 0x7D |
207 "movq (%1), %%mm0 \n\t" | 212 "movq (%1), %%mm0 \n\t" |
208 "addl %2, %1 \n\t" | 213 "addl %2, %1 \n\t" |
1575 } | 1580 } |
1576 #endif | 1581 #endif |
1577 } | 1582 } |
1578 | 1583 |
1579 /** | 1584 /** |
1580 * Do a horizontal low pass filter on the 8x8 block | 1585 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) |
1581 * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) | 1586 * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) |
1582 * useing approximately the 7-Tap Filter (1,2,3,4,3,2,1)/16 (MMX2/3DNOW version) | 1587 * useing the 7-Tap Filter (2,2,2,4,2,2,2)/16 (MMX2/3DNOW version) |
1583 */ | 1588 */ |
1584 static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) | 1589 static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) |
1585 { | 1590 { |
1586 //return; | 1591 //return; |
1587 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 1592 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
1633 "punpcklbw %%mm2, %%mm2 \n\t"\ | 1638 "punpcklbw %%mm2, %%mm2 \n\t"\ |
1634 "movq %%mm2, (%0) \n\t"\ | 1639 "movq %%mm2, (%0) \n\t"\ |
1635 */ | 1640 */ |
1636 // approximately a 7-Tap Filter with Vector (1,2,3,4,3,2,1)/16 | 1641 // approximately a 7-Tap Filter with Vector (1,2,3,4,3,2,1)/16 |
1637 /* | 1642 /* |
1638 31 | |
1639 121 | |
1640 121 | |
1641 121 | |
1642 121 | |
1643 121 | |
1644 121 | |
1645 13 | |
1646 Implemented Exact 7-Tap | 1643 Implemented Exact 7-Tap |
1647 9421 A321 | 1644 9421 A321 |
1648 36421 64321 | 1645 36421 64321 |
1649 334321 = | 1646 334321 = |
1650 1234321 = | 1647 1234321 = |
1652 123433 = | 1649 123433 = |
1653 12463 12346 | 1650 12463 12346 |
1654 1249 123A | 1651 1249 123A |
1655 | 1652 |
1656 */ | 1653 */ |
1654 | |
1657 #ifdef HAVE_MMX2 | 1655 #ifdef HAVE_MMX2 |
1658 #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ | 1656 #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ |
1659 "movq %%mm0, %%mm1 \n\t"\ | 1657 "movq %%mm0, %%mm1 \n\t"\ |
1660 "movq %%mm0, %%mm2 \n\t"\ | 1658 "movq %%mm0, %%mm2 \n\t"\ |
1661 "movq %%mm0, %%mm3 \n\t"\ | 1659 "movq %%mm0, %%mm3 \n\t"\ |
1678 "movd %%mm0, 4(%0) \n\t" | 1676 "movd %%mm0, 4(%0) \n\t" |
1679 #else | 1677 #else |
1680 #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ | 1678 #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ |
1681 "movq %%mm0, %%mm1 \n\t"\ | 1679 "movq %%mm0, %%mm1 \n\t"\ |
1682 "movq %%mm0, %%mm2 \n\t"\ | 1680 "movq %%mm0, %%mm2 \n\t"\ |
1683 "movq %%mm0, %%mm3 \n\t"\ | 1681 "movd -4(%0), %%mm3 \n\t" /*0001000*/\ |
1684 "movq %%mm0, %%mm4 \n\t"\ | 1682 "movd 8(%0), %%mm4 \n\t" /*0001000*/\ |
1685 "psllq $8, %%mm1 \n\t"\ | 1683 "psllq $8, %%mm1 \n\t"\ |
1686 "psrlq $8, %%mm2 \n\t"\ | 1684 "psrlq $8, %%mm2 \n\t"\ |
1687 "pand bm00000001, %%mm3 \n\t"\ | 1685 "psrlq $24, %%mm3 \n\t"\ |
1688 "pand bm10000000, %%mm4 \n\t"\ | 1686 "psllq $56, %%mm4 \n\t"\ |
1689 "por %%mm3, %%mm1 \n\t"\ | 1687 "por %%mm3, %%mm1 \n\t"\ |
1690 "por %%mm4, %%mm2 \n\t"\ | 1688 "por %%mm4, %%mm2 \n\t"\ |
1691 PAVGB(%%mm2, %%mm1)\ | 1689 PAVGB(%%mm2, %%mm1)\ |
1692 PAVGB(%%mm1, %%mm0)\ | 1690 PAVGB(%%mm1, %%mm0)\ |
1693 \ | 1691 \ |
1706 "movd %%mm0, (%0) \n\t"\ | 1704 "movd %%mm0, (%0) \n\t"\ |
1707 "psrlq $32, %%mm0 \n\t"\ | 1705 "psrlq $32, %%mm0 \n\t"\ |
1708 "movd %%mm0, 4(%0) \n\t" | 1706 "movd %%mm0, 4(%0) \n\t" |
1709 #endif | 1707 #endif |
1710 | 1708 |
1711 #define HLP(i) HLP3(i) | 1709 /* uses the 7-Tap Filter: 1112111 */ |
1710 #define NEW_HLP(i)\ | |
1711 "movq " #i "(%%eax), %%mm0 \n\t"\ | |
1712 "movq %%mm0, %%mm1 \n\t"\ | |
1713 "movq %%mm0, %%mm2 \n\t"\ | |
1714 "movd -4(%0), %%mm3 \n\t" /*0001000*/\ | |
1715 "movd 8(%0), %%mm4 \n\t" /*0001000*/\ | |
1716 "psllq $8, %%mm1 \n\t"\ | |
1717 "psrlq $8, %%mm2 \n\t"\ | |
1718 "psrlq $24, %%mm3 \n\t"\ | |
1719 "psllq $56, %%mm4 \n\t"\ | |
1720 "por %%mm3, %%mm1 \n\t"\ | |
1721 "por %%mm4, %%mm2 \n\t"\ | |
1722 "movq %%mm1, %%mm5 \n\t"\ | |
1723 PAVGB(%%mm2, %%mm1)\ | |
1724 PAVGB(%%mm1, %%mm0)\ | |
1725 "psllq $8, %%mm5 \n\t"\ | |
1726 "psrlq $8, %%mm2 \n\t"\ | |
1727 "por %%mm3, %%mm5 \n\t"\ | |
1728 "por %%mm4, %%mm2 \n\t"\ | |
1729 "movq %%mm5, %%mm1 \n\t"\ | |
1730 PAVGB(%%mm2, %%mm5)\ | |
1731 "psllq $8, %%mm1 \n\t"\ | |
1732 "psrlq $8, %%mm2 \n\t"\ | |
1733 "por %%mm3, %%mm1 \n\t"\ | |
1734 "por %%mm4, %%mm2 \n\t"\ | |
1735 PAVGB(%%mm2, %%mm1)\ | |
1736 PAVGB(%%mm1, %%mm5)\ | |
1737 PAVGB(%%mm5, %%mm0)\ | |
1738 "movd %%mm0, (%0) \n\t"\ | |
1739 "psrlq $32, %%mm0 \n\t"\ | |
1740 "movd %%mm0, 4(%0) \n\t" | |
1741 | |
1742 /* uses the 9-Tap Filter: 112242211 */ | |
1743 #define NEW_HLP2(i)\ | |
1744 "movq " #i "(%%eax), %%mm0 \n\t" /*0001000*/\ | |
1745 "movq %%mm0, %%mm1 \n\t" /*0001000*/\ | |
1746 "movq %%mm0, %%mm2 \n\t" /*0001000*/\ | |
1747 "movd -4(%0), %%mm3 \n\t" /*0001000*/\ | |
1748 "movd 8(%0), %%mm4 \n\t" /*0001000*/\ | |
1749 "psllq $8, %%mm1 \n\t"\ | |
1750 "psrlq $8, %%mm2 \n\t"\ | |
1751 "psrlq $24, %%mm3 \n\t"\ | |
1752 "psllq $56, %%mm4 \n\t"\ | |
1753 "por %%mm3, %%mm1 \n\t" /*0010000*/\ | |
1754 "por %%mm4, %%mm2 \n\t" /*0000100*/\ | |
1755 "movq %%mm1, %%mm5 \n\t" /*0010000*/\ | |
1756 PAVGB(%%mm2, %%mm1) /*0010100*/\ | |
1757 PAVGB(%%mm1, %%mm0) /*0012100*/\ | |
1758 "psllq $8, %%mm5 \n\t"\ | |
1759 "psrlq $8, %%mm2 \n\t"\ | |
1760 "por %%mm3, %%mm5 \n\t" /*0100000*/\ | |
1761 "por %%mm4, %%mm2 \n\t" /*0000010*/\ | |
1762 "movq %%mm5, %%mm1 \n\t" /*0100000*/\ | |
1763 PAVGB(%%mm2, %%mm5) /*0100010*/\ | |
1764 "psllq $8, %%mm1 \n\t"\ | |
1765 "psrlq $8, %%mm2 \n\t"\ | |
1766 "por %%mm3, %%mm1 \n\t" /*1000000*/\ | |
1767 "por %%mm4, %%mm2 \n\t" /*0000001*/\ | |
1768 "movq %%mm1, %%mm6 \n\t" /*1000000*/\ | |
1769 PAVGB(%%mm2, %%mm1) /*1000001*/\ | |
1770 "psllq $8, %%mm6 \n\t"\ | |
1771 "psrlq $8, %%mm2 \n\t"\ | |
1772 "por %%mm3, %%mm6 \n\t"/*100000000*/\ | |
1773 "por %%mm4, %%mm2 \n\t"/*000000001*/\ | |
1774 PAVGB(%%mm2, %%mm6) /*100000001*/\ | |
1775 PAVGB(%%mm6, %%mm1) /*110000011*/\ | |
1776 PAVGB(%%mm1, %%mm5) /*112000211*/\ | |
1777 PAVGB(%%mm5, %%mm0) /*112242211*/\ | |
1778 "movd %%mm0, (%0) \n\t"\ | |
1779 "psrlq $32, %%mm0 \n\t"\ | |
1780 "movd %%mm0, 4(%0) \n\t" | |
1781 | |
1782 #define HLP(i) NEW_HLP(i) | |
1712 | 1783 |
1713 HLP(0) | 1784 HLP(0) |
1714 "addl %1, %0 \n\t" | 1785 "addl %1, %0 \n\t" |
1715 HLP(8) | 1786 HLP(8) |
1716 "addl %1, %0 \n\t" | 1787 "addl %1, %0 \n\t" |
1826 | 1897 |
1827 //FIXME | 1898 //FIXME |
1828 #endif | 1899 #endif |
1829 } | 1900 } |
1830 | 1901 |
1902 /** | |
1903 * Deinterlaces the given block | |
1904 * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block | |
1905 */ | |
1906 static inline void deInterlaceInterpolateLinear(uint8_t src[], int stride) | |
1907 { | |
1908 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
1909 asm volatile( | |
1910 "leal (%0, %1), %%eax \n\t" | |
1911 "leal (%%eax, %1, 4), %%ebx \n\t" | |
1912 // 0 1 2 3 4 5 6 7 8 9 | |
1913 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |
1914 | |
1915 "movq (%0), %%mm0 \n\t" | |
1916 "movq (%%eax, %1), %%mm1 \n\t" | |
1917 PAVGB(%%mm1, %%mm0)\ | |
1918 "movq %%mm0, (%%eax) \n\t" | |
1919 "movq (%0, %1, 4), %%mm0 \n\t" | |
1920 PAVGB(%%mm0, %%mm1)\ | |
1921 "movq %%mm1, (%%eax, %1, 2) \n\t" | |
1922 "movq (%%ebx, %1), %%mm1 \n\t" | |
1923 PAVGB(%%mm1, %%mm0)\ | |
1924 "movq %%mm0, (%%ebx) \n\t" | |
1925 "movq (%0, %1, 8), %%mm0 \n\t" | |
1926 PAVGB(%%mm0, %%mm1)\ | |
1927 "movq %%mm1, (%%ebx, %1, 2) \n\t" | |
1928 | |
1929 : : "r" (src), "r" (stride) | |
1930 : "%eax", "%ebx" | |
1931 ); | |
1932 #else | |
1933 int x; | |
1934 for(x=0; x<8; x++) | |
1935 { | |
1936 src[stride] = (src[0] + src[stride*2])>>1; | |
1937 src[stride*3] = (src[stride*2] + src[stride*4])>>1; | |
1938 src[stride*5] = (src[stride*4] + src[stride*6])>>1; | |
1939 src[stride*7] = (src[stride*6] + src[stride*8])>>1; | |
1940 src++; | |
1941 } | |
1942 #endif | |
1943 } | |
1944 | |
1945 /** | |
1946 * Deinterlaces the given block | |
1947 * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block | |
1948 */ | |
1949 static inline void deInterlaceInterpolateLinearLastRow(uint8_t src[], int stride) | |
1950 { | |
1951 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
1952 asm volatile( | |
1953 "leal (%0, %1), %%eax \n\t" | |
1954 "leal (%%eax, %1, 4), %%ebx \n\t" | |
1955 // 0 1 2 3 4 5 6 7 8 9 | |
1956 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |
1957 | |
1958 "movq (%0), %%mm0 \n\t" | |
1959 "movq (%%eax, %1), %%mm1 \n\t" | |
1960 PAVGB(%%mm1, %%mm0)\ | |
1961 "movq %%mm0, (%%eax) \n\t" | |
1962 "movq (%0, %1, 4), %%mm0 \n\t" | |
1963 PAVGB(%%mm0, %%mm1)\ | |
1964 "movq %%mm1, (%%eax, %1, 2) \n\t" | |
1965 "movq (%%ebx, %1), %%mm1 \n\t" | |
1966 PAVGB(%%mm1, %%mm0)\ | |
1967 "movq %%mm0, (%%ebx) \n\t" | |
1968 "movq %%mm1, (%%ebx, %1, 2) \n\t" | |
1969 | |
1970 | |
1971 : : "r" (src), "r" (stride) | |
1972 : "%eax", "%ebx" | |
1973 ); | |
1974 #else | |
1975 int x; | |
1976 for(x=0; x<8; x++) | |
1977 { | |
1978 src[stride] = (src[0] + src[stride*2])>>1; | |
1979 src[stride*3] = (src[stride*2] + src[stride*4])>>1; | |
1980 src[stride*5] = (src[stride*4] + src[stride*6])>>1; | |
1981 src[stride*7] = src[stride*6]; | |
1982 src++; | |
1983 } | |
1984 #endif | |
1985 } | |
1986 | |
1987 /** | |
1988 * Deinterlaces the given block | |
1989 * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block | |
1990 * will shift the image up by 1 line (FIXME if this is a problem) | |
1991 */ | |
1992 static inline void deInterlaceBlendLinear(uint8_t src[], int stride) | |
1993 { | |
1994 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
1995 asm volatile( | |
1996 "leal (%0, %1), %%eax \n\t" | |
1997 "leal (%%eax, %1, 4), %%ebx \n\t" | |
1998 // 0 1 2 3 4 5 6 7 8 9 | |
1999 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |
2000 | |
2001 "movq (%0), %%mm0 \n\t" // L0 | |
2002 "movq (%%eax, %1), %%mm1 \n\t" // L2 | |
2003 PAVGB(%%mm1, %%mm0) // L0+L2 | |
2004 "movq (%%eax), %%mm2 \n\t" // L1 | |
2005 PAVGB(%%mm2, %%mm0) | |
2006 "movq %%mm0, (%0) \n\t" | |
2007 "movq (%%eax, %1, 2), %%mm0 \n\t" // L3 | |
2008 PAVGB(%%mm0, %%mm2) // L1+L3 | |
2009 PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3 | |
2010 "movq %%mm2, (%%eax) \n\t" | |
2011 "movq (%0, %1, 4), %%mm2 \n\t" // L4 | |
2012 PAVGB(%%mm2, %%mm1) // L2+L4 | |
2013 PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4 | |
2014 "movq %%mm1, (%%eax, %1) \n\t" | |
2015 "movq (%%ebx), %%mm1 \n\t" // L5 | |
2016 PAVGB(%%mm1, %%mm0) // L3+L5 | |
2017 PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5 | |
2018 "movq %%mm0, (%%eax, %1, 2) \n\t" | |
2019 "movq (%%ebx, %1), %%mm0 \n\t" // L6 | |
2020 PAVGB(%%mm0, %%mm2) // L4+L6 | |
2021 PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6 | |
2022 "movq %%mm2, (%0, %1, 4) \n\t" | |
2023 "movq (%%ebx, %1, 2), %%mm2 \n\t" // L7 | |
2024 PAVGB(%%mm2, %%mm1) // L5+L7 | |
2025 PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7 | |
2026 "movq %%mm1, (%%ebx) \n\t" | |
2027 "movq (%0, %1, 8), %%mm1 \n\t" // L8 | |
2028 PAVGB(%%mm1, %%mm0) // L6+L8 | |
2029 PAVGB(%%mm2, %%mm0) // 2L7 + L6 + L8 | |
2030 "movq %%mm0, (%%ebx, %1) \n\t" | |
2031 "movq (%%ebx, %1, 4), %%mm0 \n\t" // L9 | |
2032 PAVGB(%%mm0, %%mm2) // L7+L9 | |
2033 PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9 | |
2034 "movq %%mm2, (%%ebx, %1, 2) \n\t" | |
2035 | |
2036 | |
2037 : : "r" (src), "r" (stride) | |
2038 : "%eax", "%ebx" | |
2039 ); | |
2040 #else | |
2041 int x; | |
2042 for(x=0; x<8; x++) | |
2043 { | |
2044 src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; | |
2045 src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; | |
2046 src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; | |
2047 src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; | |
2048 src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; | |
2049 src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; | |
2050 src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2; | |
2051 src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2; | |
2052 src++; | |
2053 } | |
2054 #endif | |
2055 } | |
2056 | |
2057 /** | |
2058 * Deinterlaces the given block | |
2059 * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block | |
2060 * will shift the image up by 1 line (FIXME if this is a problem) | |
2061 */ | |
2062 static inline void deInterlaceBlendLinearLastRow(uint8_t src[], int stride) | |
2063 { | |
2064 #if defined (HAVE_MMSX2) || defined (HAVE_3DNOW) | |
2065 asm volatile( | |
2066 "leal (%0, %1), %%eax \n\t" | |
2067 "leal (%%eax, %1, 4), %%ebx \n\t" | |
2068 // 0 1 2 3 4 5 6 7 8 9 | |
2069 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |
2070 | |
2071 "movq (%0), %%mm0 \n\t" // L0 | |
2072 "movq (%%eax, %1), %%mm1 \n\t" // L2 | |
2073 PAVGB(%%mm1, %%mm0) // L0+L2 | |
2074 "movq (%%eax), %%mm2 \n\t" // L1 | |
2075 PAVGB(%%mm2, %%mm0) | |
2076 "movq %%mm0, (%0) \n\t" | |
2077 "movq (%%eax, %1, 2), %%mm0 \n\t" // L3 | |
2078 PAVGB(%%mm0, %%mm2) // L1+L3 | |
2079 PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3 | |
2080 "movq %%mm2, (%%eax) \n\t" | |
2081 "movq (%0, %1, 4), %%mm2 \n\t" // L4 | |
2082 PAVGB(%%mm2, %%mm1) // L2+L4 | |
2083 PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4 | |
2084 "movq %%mm1, (%%eax, %1) \n\t" | |
2085 "movq (%%ebx), %%mm1 \n\t" // L5 | |
2086 PAVGB(%%mm1, %%mm0) // L3+L5 | |
2087 PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5 | |
2088 "movq %%mm0, (%%eax, %1, 2) \n\t" | |
2089 "movq (%%ebx, %1), %%mm0 \n\t" // L6 | |
2090 PAVGB(%%mm0, %%mm2) // L4+L6 | |
2091 PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6 | |
2092 "movq %%mm2, (%0, %1, 4) \n\t" | |
2093 "movq (%%ebx, %1, 2), %%mm2 \n\t" // L7 | |
2094 PAVGB(%%mm2, %%mm1) // L5+L7 | |
2095 PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7 | |
2096 "movq %%mm1, (%%ebx) \n\t" | |
2097 PAVGB(%%mm2, %%mm0) // L7 + L8 | |
2098 "movq %%mm0, (%%ebx, %1) \n\t" | |
2099 "movq %%mm0, (%%ebx, %1, 2) \n\t" | |
2100 | |
2101 : : "r" (src), "r" (stride) | |
2102 : "%eax", "%ebx" | |
2103 ); | |
2104 #else | |
2105 int x; | |
2106 for(x=0; x<8; x++) | |
2107 { | |
2108 src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; | |
2109 src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; | |
2110 src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; | |
2111 src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; | |
2112 src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; | |
2113 src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; | |
2114 src[stride*6] = (src[stride*6] + src[stride*7])>>1; | |
2115 src[stride*7] = src[stride*6]; | |
2116 src++; | |
2117 } | |
2118 #endif | |
2119 } | |
2120 | |
2121 /** | |
2122 * Deinterlaces the given block | |
2123 * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block | |
2124 */ | |
2125 static inline void deInterlaceMedian(uint8_t src[], int stride) | |
2126 { | |
2127 #if defined (HAVE_MMX2) | |
2128 asm volatile( | |
2129 "leal (%0, %1), %%eax \n\t" | |
2130 "leal (%%eax, %1, 4), %%ebx \n\t" | |
2131 // 0 1 2 3 4 5 6 7 8 9 | |
2132 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |
2133 | |
2134 "movq (%0), %%mm0 \n\t" // | |
2135 "movq (%%eax, %1), %%mm2 \n\t" // | |
2136 "movq (%%eax), %%mm1 \n\t" // | |
2137 "movq %%mm0, %%mm3 \n\t" | |
2138 "pmaxub %%mm1, %%mm0 \n\t" // | |
2139 "pminub %%mm3, %%mm1 \n\t" // | |
2140 "pmaxub %%mm2, %%mm1 \n\t" // | |
2141 "pminub %%mm1, %%mm0 \n\t" | |
2142 "movq %%mm0, (%%eax) \n\t" | |
2143 | |
2144 "movq (%0, %1, 4), %%mm0 \n\t" // | |
2145 "movq (%%eax, %1, 2), %%mm1 \n\t" // | |
2146 "movq %%mm2, %%mm3 \n\t" | |
2147 "pmaxub %%mm1, %%mm2 \n\t" // | |
2148 "pminub %%mm3, %%mm1 \n\t" // | |
2149 "pmaxub %%mm0, %%mm1 \n\t" // | |
2150 "pminub %%mm1, %%mm2 \n\t" | |
2151 "movq %%mm2, (%%eax, %1, 2) \n\t" | |
2152 | |
2153 "movq (%%ebx), %%mm2 \n\t" // | |
2154 "movq (%%ebx, %1), %%mm1 \n\t" // | |
2155 "movq %%mm2, %%mm3 \n\t" | |
2156 "pmaxub %%mm0, %%mm2 \n\t" // | |
2157 "pminub %%mm3, %%mm0 \n\t" // | |
2158 "pmaxub %%mm1, %%mm0 \n\t" // | |
2159 "pminub %%mm0, %%mm2 \n\t" | |
2160 "movq %%mm2, (%%ebx) \n\t" | |
2161 | |
2162 "movq (%%ebx, %1, 2), %%mm2 \n\t" // | |
2163 "movq (%0, %1, 8), %%mm0 \n\t" // | |
2164 "movq %%mm2, %%mm3 \n\t" | |
2165 "pmaxub %%mm0, %%mm2 \n\t" // | |
2166 "pminub %%mm3, %%mm0 \n\t" // | |
2167 "pmaxub %%mm1, %%mm0 \n\t" // | |
2168 "pminub %%mm0, %%mm2 \n\t" | |
2169 "movq %%mm2, (%%ebx, %1, 2) \n\t" | |
2170 | |
2171 | |
2172 : : "r" (src), "r" (stride) | |
2173 : "%eax", "%ebx" | |
2174 ); | |
2175 #else | |
2176 //FIXME | |
2177 int x; | |
2178 for(x=0; x<8; x++) | |
2179 { | |
2180 src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; | |
2181 src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; | |
2182 src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; | |
2183 src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; | |
2184 src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; | |
2185 src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; | |
2186 src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2; | |
2187 src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2; | |
2188 src++; | |
2189 } | |
2190 #endif | |
2191 } | |
2192 | |
2193 /** | |
2194 * Deinterlaces the given block | |
2195 * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block | |
2196 * will shift the image up by 1 line (FIXME if this is a problem) | |
2197 */ | |
2198 static inline void deInterlaceMedianLastRow(uint8_t src[], int stride) | |
2199 { | |
2200 #if defined (HAVE_MMX2) | |
2201 asm volatile( | |
2202 "leal (%0, %1), %%eax \n\t" | |
2203 "leal (%%eax, %1, 4), %%ebx \n\t" | |
2204 // 0 1 2 3 4 5 6 7 8 9 | |
2205 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1 | |
2206 | |
2207 "movq (%0), %%mm0 \n\t" // | |
2208 "movq (%%eax, %1), %%mm2 \n\t" // | |
2209 "movq (%%eax), %%mm1 \n\t" // | |
2210 "movq %%mm0, %%mm3 \n\t" | |
2211 "pmaxub %%mm1, %%mm0 \n\t" // | |
2212 "pminub %%mm3, %%mm1 \n\t" // | |
2213 "pmaxub %%mm2, %%mm1 \n\t" // | |
2214 "pminub %%mm1, %%mm0 \n\t" | |
2215 "movq %%mm0, (%%eax) \n\t" | |
2216 | |
2217 "movq (%0, %1, 4), %%mm0 \n\t" // | |
2218 "movq (%%eax, %1, 2), %%mm1 \n\t" // | |
2219 "movq %%mm2, %%mm3 \n\t" | |
2220 "pmaxub %%mm1, %%mm2 \n\t" // | |
2221 "pminub %%mm3, %%mm1 \n\t" // | |
2222 "pmaxub %%mm0, %%mm1 \n\t" // | |
2223 "pminub %%mm1, %%mm2 \n\t" | |
2224 "movq %%mm2, (%%eax, %1, 2) \n\t" | |
2225 | |
2226 "movq (%%ebx), %%mm2 \n\t" // | |
2227 "movq (%%ebx, %1), %%mm1 \n\t" // | |
2228 "movq %%mm2, %%mm3 \n\t" | |
2229 "pmaxub %%mm0, %%mm2 \n\t" // | |
2230 "pminub %%mm3, %%mm0 \n\t" // | |
2231 "pmaxub %%mm1, %%mm0 \n\t" // | |
2232 "pminub %%mm0, %%mm2 \n\t" | |
2233 "movq %%mm2, (%%ebx) \n\t" | |
2234 | |
2235 "movq %%mm1, (%%ebx, %1, 2) \n\t" | |
2236 | |
2237 : : "r" (src), "r" (stride) | |
2238 : "%eax", "%ebx" | |
2239 ); | |
2240 #else | |
2241 //FIXME | |
2242 int x; | |
2243 for(x=0; x<8; x++) | |
2244 { | |
2245 src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2; | |
2246 src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2; | |
2247 src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2; | |
2248 src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2; | |
2249 src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2; | |
2250 src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2; | |
2251 src[stride*6] = (src[stride*6] + src[stride*7])>>1; | |
2252 src[stride*7] = src[stride*6]; | |
2253 src++; | |
2254 } | |
2255 #endif | |
2256 } | |
2257 | |
2258 | |
1831 #ifdef HAVE_ODIVX_POSTPROCESS | 2259 #ifdef HAVE_ODIVX_POSTPROCESS |
1832 #include "../opendivx/postprocess.h" | 2260 #include "../opendivx/postprocess.h" |
1833 int use_old_pp=0; | 2261 int use_old_pp=0; |
1834 #endif | 2262 #endif |
1835 | 2263 |
1839 /** | 2267 /** |
1840 * ... | 2268 * ... |
1841 * the mode value is interpreted as a quality value if its negative, its range is then (-1 ... -63) | 2269 * the mode value is interpreted as a quality value if its negative, its range is then (-1 ... -63) |
1842 * -63 is best quality -1 is worst | 2270 * -63 is best quality -1 is worst |
1843 */ | 2271 */ |
1844 //extern "C"{ | |
1845 void postprocess(unsigned char * src[], int src_stride, | 2272 void postprocess(unsigned char * src[], int src_stride, |
1846 unsigned char * dst[], int dst_stride, | 2273 unsigned char * dst[], int dst_stride, |
1847 int horizontal_size, int vertical_size, | 2274 int horizontal_size, int vertical_size, |
1848 QP_STORE_T *QP_store, int QP_stride, | 2275 QP_STORE_T *QP_store, int QP_stride, |
1849 int mode) | 2276 int mode) |
2194 if(!isColor) yHistogram[ srcBlock[0] ]++; | 2621 if(!isColor) yHistogram[ srcBlock[0] ]++; |
2195 | 2622 |
2196 blockCopy(vertBlock + dstStride*2, dstStride, | 2623 blockCopy(vertBlock + dstStride*2, dstStride, |
2197 vertSrcBlock + srcStride*2, srcStride, 8, mode & LEVEL_FIX); | 2624 vertSrcBlock + srcStride*2, srcStride, 8, mode & LEVEL_FIX); |
2198 | 2625 |
2626 if(mode & LINEAR_IPOL_DEINT_FILTER) | |
2627 deInterlaceInterpolateLinear(dstBlock, dstStride); | |
2628 else if(mode & LINEAR_BLEND_DEINT_FILTER) | |
2629 deInterlaceBlendLinear(dstBlock, dstStride); | |
2630 else if(mode & MEDIAN_DEINT_FILTER) | |
2631 deInterlaceMedian(dstBlock, dstStride); | |
2632 /* else if(mode & CUBIC_IPOL_DEINT_FILTER) | |
2633 deInterlaceInterpolateCubic(dstBlock, dstStride); | |
2634 else if(mode & CUBIC_BLEND_DEINT_FILTER) | |
2635 deInterlaceBlendCubic(dstBlock, dstStride); | |
2636 */ | |
2199 | 2637 |
2200 #ifdef MORE_TIMEING | 2638 #ifdef MORE_TIMEING |
2201 T1= rdtsc(); | 2639 T1= rdtsc(); |
2202 memcpyTime+= T1-T0; | 2640 memcpyTime+= T1-T0; |
2203 T0=T1; | 2641 T0=T1; |
2224 vertTime+= T1-T0; | 2662 vertTime+= T1-T0; |
2225 T0=T1; | 2663 T0=T1; |
2226 #endif | 2664 #endif |
2227 } | 2665 } |
2228 else | 2666 else |
2667 { | |
2229 blockCopy(vertBlock + dstStride*1, dstStride, | 2668 blockCopy(vertBlock + dstStride*1, dstStride, |
2230 vertSrcBlock + srcStride*1, srcStride, 4, mode & LEVEL_FIX); | 2669 vertSrcBlock + srcStride*1, srcStride, 4, mode & LEVEL_FIX); |
2231 | 2670 |
2671 if(mode & LINEAR_IPOL_DEINT_FILTER) | |
2672 deInterlaceInterpolateLinearLastRow(dstBlock, dstStride); | |
2673 else if(mode & LINEAR_BLEND_DEINT_FILTER) | |
2674 deInterlaceBlendLinearLastRow(dstBlock, dstStride); | |
2675 else if(mode & MEDIAN_DEINT_FILTER) | |
2676 deInterlaceMedianLastRow(dstBlock, dstStride); | |
2677 /* else if(mode & CUBIC_IPOL_DEINT_FILTER) | |
2678 deInterlaceInterpolateCubicLastRow(dstBlock, dstStride); | |
2679 else if(mode & CUBIC_BLEND_DEINT_FILTER) | |
2680 deInterlaceBlendCubicLastRow(dstBlock, dstStride); | |
2681 */ | |
2682 } | |
2232 | 2683 |
2233 if(x - 8 >= 0 && x<width) | 2684 if(x - 8 >= 0 && x<width) |
2234 { | 2685 { |
2235 #ifdef MORE_TIMEING | 2686 #ifdef MORE_TIMEING |
2236 T0= rdtsc(); | 2687 T0= rdtsc(); |