comparison libpostproc/postprocess_template.c @ 106:389391a6d0bf libavcodec

rewrote the horizontal lowpass filter to fix a bug which caused a blocky look added deinterlace filters (linear interpolate, linear blend, median) minor cleanups (removed some outcommented stuff)
author michael
date Mon, 15 Oct 2001 03:01:08 +0000
parents a2f94bfb5793
children bd163e13a0fb
comparison
equal deleted inserted replaced
105:a2f94bfb5793 106:389391a6d0bf
15 along with this program; if not, write to the Free Software 15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
19 /* 19 /*
20 C MMX MMX2 3DNow* 20 C MMX MMX2 3DNow
21 isVertDC Ec Ec 21 isVertDC Ec Ec
22 isVertMinMaxOk Ec Ec 22 isVertMinMaxOk Ec Ec
23 doVertLowPass E e e* 23 doVertLowPass E e e
24 doVertDefFilter Ec Ec Ec 24 doVertDefFilter Ec Ec Ec
25 isHorizDC Ec Ec 25 isHorizDC Ec Ec
26 isHorizMinMaxOk a 26 isHorizMinMaxOk a
27 doHorizLowPass E a a* 27 doHorizLowPass E a a
28 doHorizDefFilter E ac ac 28 doHorizDefFilter E ac ac
29 deRing 29 deRing
30 Vertical RKAlgo1 E a a* 30 Vertical RKAlgo1 E a a
31 Vertical X1 a E E* 31 Vertical X1 a E E
32 Horizontal X1 a E E* 32 Horizontal X1 a E E
33 LinIpolDeinterlace a E E*
34 LinBlendDeinterlace a E E*
35 MedianDeinterlace a E
33 36
34 37
35 * i dont have a 3dnow CPU -> its untested 38 * i dont have a 3dnow CPU -> its untested
36 E = Exact implementation 39 E = Exact implementation
37 e = allmost exact implementation 40 e = allmost exact implementation
53 make the mainloop more flexible (variable number of blocks at once 56 make the mainloop more flexible (variable number of blocks at once
54 (the if/else stuff per block is slowing things down) 57 (the if/else stuff per block is slowing things down)
55 compare the quality & speed of all filters 58 compare the quality & speed of all filters
56 implement a few simple deinterlacing filters 59 implement a few simple deinterlacing filters
57 split this huge file 60 split this huge file
61 fix warnings (unused vars, ...)
58 ... 62 ...
59 63
60 Notes: 64 Notes:
61 65
62 */ 66 */
63 67
64 /* 68 /*
65 Changelog: use the CVS log 69 Changelog: use the CVS log
70 rewrote the horizontal lowpass filter to fix a bug which caused a blocky look
71 added deinterlace filters (linear interpolate, linear blend, median)
72 minor cleanups (removed some outcommented stuff)
66 0.1.3 73 0.1.3
67 bugfixes: last 3 lines not brightness/contrast corrected 74 bugfixes: last 3 lines not brightness/contrast corrected
68 brightness statistics messed up with initial black pic 75 brightness statistics messed up with initial black pic
69 changed initial values of the brightness statistics 76 changed initial values of the brightness statistics
70 C++ -> C conversation 77 C++ -> C conversation
192 //FIXME? |255-0| = 1 (shouldnt be a problem ...) 199 //FIXME? |255-0| = 1 (shouldnt be a problem ...)
193 /** 200 /**
194 * Check if the middle 8x8 Block in the given 8x10 block is flat 201 * Check if the middle 8x8 Block in the given 8x10 block is flat
195 */ 202 */
196 static inline int isVertDC(uint8_t src[], int stride){ 203 static inline int isVertDC(uint8_t src[], int stride){
197 // return true;
198 int numEq= 0; 204 int numEq= 0;
199 int y; 205 int y;
200 src+= stride; // src points to begin of the 8x8 Block 206 src+= stride; // src points to begin of the 8x8 Block
201 #ifdef HAVE_MMX 207 #ifdef HAVE_MMX
202 asm volatile( 208 asm volatile(
203 // "int $3 \n\t"
204 "pushl %1\n\t" 209 "pushl %1\n\t"
205 "movq b7E, %%mm7 \n\t" // mm7 = 0x7F 210 "movq b7E, %%mm7 \n\t" // mm7 = 0x7F
206 "movq b7C, %%mm6 \n\t" // mm6 = 0x7D 211 "movq b7C, %%mm6 \n\t" // mm6 = 0x7D
207 "movq (%1), %%mm0 \n\t" 212 "movq (%1), %%mm0 \n\t"
208 "addl %2, %1 \n\t" 213 "addl %2, %1 \n\t"
1575 } 1580 }
1576 #endif 1581 #endif
1577 } 1582 }
1578 1583
1579 /** 1584 /**
1580 * Do a horizontal low pass filter on the 8x8 block 1585 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
1581 * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) 1586 * useing the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
1582 * useing approximately the 7-Tap Filter (1,2,3,4,3,2,1)/16 (MMX2/3DNOW version) 1587 * useing the 7-Tap Filter (2,2,2,4,2,2,2)/16 (MMX2/3DNOW version)
1583 */ 1588 */
1584 static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP) 1589 static inline void doHorizLowPassAndCopyBack(uint8_t dst[], int stride, int QP)
1585 { 1590 {
1586 //return; 1591 //return;
1587 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 1592 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1633 "punpcklbw %%mm2, %%mm2 \n\t"\ 1638 "punpcklbw %%mm2, %%mm2 \n\t"\
1634 "movq %%mm2, (%0) \n\t"\ 1639 "movq %%mm2, (%0) \n\t"\
1635 */ 1640 */
1636 // approximately a 7-Tap Filter with Vector (1,2,3,4,3,2,1)/16 1641 // approximately a 7-Tap Filter with Vector (1,2,3,4,3,2,1)/16
1637 /* 1642 /*
1638 31
1639 121
1640 121
1641 121
1642 121
1643 121
1644 121
1645 13
1646 Implemented Exact 7-Tap 1643 Implemented Exact 7-Tap
1647 9421 A321 1644 9421 A321
1648 36421 64321 1645 36421 64321
1649 334321 = 1646 334321 =
1650 1234321 = 1647 1234321 =
1652 123433 = 1649 123433 =
1653 12463 12346 1650 12463 12346
1654 1249 123A 1651 1249 123A
1655 1652
1656 */ 1653 */
1654
1657 #ifdef HAVE_MMX2 1655 #ifdef HAVE_MMX2
1658 #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ 1656 #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\
1659 "movq %%mm0, %%mm1 \n\t"\ 1657 "movq %%mm0, %%mm1 \n\t"\
1660 "movq %%mm0, %%mm2 \n\t"\ 1658 "movq %%mm0, %%mm2 \n\t"\
1661 "movq %%mm0, %%mm3 \n\t"\ 1659 "movq %%mm0, %%mm3 \n\t"\
1678 "movd %%mm0, 4(%0) \n\t" 1676 "movd %%mm0, 4(%0) \n\t"
1679 #else 1677 #else
1680 #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\ 1678 #define HLP3(i) "movq " #i "(%%eax), %%mm0 \n\t"\
1681 "movq %%mm0, %%mm1 \n\t"\ 1679 "movq %%mm0, %%mm1 \n\t"\
1682 "movq %%mm0, %%mm2 \n\t"\ 1680 "movq %%mm0, %%mm2 \n\t"\
1683 "movq %%mm0, %%mm3 \n\t"\ 1681 "movd -4(%0), %%mm3 \n\t" /*0001000*/\
1684 "movq %%mm0, %%mm4 \n\t"\ 1682 "movd 8(%0), %%mm4 \n\t" /*0001000*/\
1685 "psllq $8, %%mm1 \n\t"\ 1683 "psllq $8, %%mm1 \n\t"\
1686 "psrlq $8, %%mm2 \n\t"\ 1684 "psrlq $8, %%mm2 \n\t"\
1687 "pand bm00000001, %%mm3 \n\t"\ 1685 "psrlq $24, %%mm3 \n\t"\
1688 "pand bm10000000, %%mm4 \n\t"\ 1686 "psllq $56, %%mm4 \n\t"\
1689 "por %%mm3, %%mm1 \n\t"\ 1687 "por %%mm3, %%mm1 \n\t"\
1690 "por %%mm4, %%mm2 \n\t"\ 1688 "por %%mm4, %%mm2 \n\t"\
1691 PAVGB(%%mm2, %%mm1)\ 1689 PAVGB(%%mm2, %%mm1)\
1692 PAVGB(%%mm1, %%mm0)\ 1690 PAVGB(%%mm1, %%mm0)\
1693 \ 1691 \
1706 "movd %%mm0, (%0) \n\t"\ 1704 "movd %%mm0, (%0) \n\t"\
1707 "psrlq $32, %%mm0 \n\t"\ 1705 "psrlq $32, %%mm0 \n\t"\
1708 "movd %%mm0, 4(%0) \n\t" 1706 "movd %%mm0, 4(%0) \n\t"
1709 #endif 1707 #endif
1710 1708
1711 #define HLP(i) HLP3(i) 1709 /* uses the 7-Tap Filter: 1112111 */
1710 #define NEW_HLP(i)\
1711 "movq " #i "(%%eax), %%mm0 \n\t"\
1712 "movq %%mm0, %%mm1 \n\t"\
1713 "movq %%mm0, %%mm2 \n\t"\
1714 "movd -4(%0), %%mm3 \n\t" /*0001000*/\
1715 "movd 8(%0), %%mm4 \n\t" /*0001000*/\
1716 "psllq $8, %%mm1 \n\t"\
1717 "psrlq $8, %%mm2 \n\t"\
1718 "psrlq $24, %%mm3 \n\t"\
1719 "psllq $56, %%mm4 \n\t"\
1720 "por %%mm3, %%mm1 \n\t"\
1721 "por %%mm4, %%mm2 \n\t"\
1722 "movq %%mm1, %%mm5 \n\t"\
1723 PAVGB(%%mm2, %%mm1)\
1724 PAVGB(%%mm1, %%mm0)\
1725 "psllq $8, %%mm5 \n\t"\
1726 "psrlq $8, %%mm2 \n\t"\
1727 "por %%mm3, %%mm5 \n\t"\
1728 "por %%mm4, %%mm2 \n\t"\
1729 "movq %%mm5, %%mm1 \n\t"\
1730 PAVGB(%%mm2, %%mm5)\
1731 "psllq $8, %%mm1 \n\t"\
1732 "psrlq $8, %%mm2 \n\t"\
1733 "por %%mm3, %%mm1 \n\t"\
1734 "por %%mm4, %%mm2 \n\t"\
1735 PAVGB(%%mm2, %%mm1)\
1736 PAVGB(%%mm1, %%mm5)\
1737 PAVGB(%%mm5, %%mm0)\
1738 "movd %%mm0, (%0) \n\t"\
1739 "psrlq $32, %%mm0 \n\t"\
1740 "movd %%mm0, 4(%0) \n\t"
1741
1742 /* uses the 9-Tap Filter: 112242211 */
1743 #define NEW_HLP2(i)\
1744 "movq " #i "(%%eax), %%mm0 \n\t" /*0001000*/\
1745 "movq %%mm0, %%mm1 \n\t" /*0001000*/\
1746 "movq %%mm0, %%mm2 \n\t" /*0001000*/\
1747 "movd -4(%0), %%mm3 \n\t" /*0001000*/\
1748 "movd 8(%0), %%mm4 \n\t" /*0001000*/\
1749 "psllq $8, %%mm1 \n\t"\
1750 "psrlq $8, %%mm2 \n\t"\
1751 "psrlq $24, %%mm3 \n\t"\
1752 "psllq $56, %%mm4 \n\t"\
1753 "por %%mm3, %%mm1 \n\t" /*0010000*/\
1754 "por %%mm4, %%mm2 \n\t" /*0000100*/\
1755 "movq %%mm1, %%mm5 \n\t" /*0010000*/\
1756 PAVGB(%%mm2, %%mm1) /*0010100*/\
1757 PAVGB(%%mm1, %%mm0) /*0012100*/\
1758 "psllq $8, %%mm5 \n\t"\
1759 "psrlq $8, %%mm2 \n\t"\
1760 "por %%mm3, %%mm5 \n\t" /*0100000*/\
1761 "por %%mm4, %%mm2 \n\t" /*0000010*/\
1762 "movq %%mm5, %%mm1 \n\t" /*0100000*/\
1763 PAVGB(%%mm2, %%mm5) /*0100010*/\
1764 "psllq $8, %%mm1 \n\t"\
1765 "psrlq $8, %%mm2 \n\t"\
1766 "por %%mm3, %%mm1 \n\t" /*1000000*/\
1767 "por %%mm4, %%mm2 \n\t" /*0000001*/\
1768 "movq %%mm1, %%mm6 \n\t" /*1000000*/\
1769 PAVGB(%%mm2, %%mm1) /*1000001*/\
1770 "psllq $8, %%mm6 \n\t"\
1771 "psrlq $8, %%mm2 \n\t"\
1772 "por %%mm3, %%mm6 \n\t"/*100000000*/\
1773 "por %%mm4, %%mm2 \n\t"/*000000001*/\
1774 PAVGB(%%mm2, %%mm6) /*100000001*/\
1775 PAVGB(%%mm6, %%mm1) /*110000011*/\
1776 PAVGB(%%mm1, %%mm5) /*112000211*/\
1777 PAVGB(%%mm5, %%mm0) /*112242211*/\
1778 "movd %%mm0, (%0) \n\t"\
1779 "psrlq $32, %%mm0 \n\t"\
1780 "movd %%mm0, 4(%0) \n\t"
1781
1782 #define HLP(i) NEW_HLP(i)
1712 1783
1713 HLP(0) 1784 HLP(0)
1714 "addl %1, %0 \n\t" 1785 "addl %1, %0 \n\t"
1715 HLP(8) 1786 HLP(8)
1716 "addl %1, %0 \n\t" 1787 "addl %1, %0 \n\t"
1826 1897
1827 //FIXME 1898 //FIXME
1828 #endif 1899 #endif
1829 } 1900 }
1830 1901
1902 /**
1903 * Deinterlaces the given block
1904 * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block
1905 */
1906 static inline void deInterlaceInterpolateLinear(uint8_t src[], int stride)
1907 {
1908 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1909 asm volatile(
1910 "leal (%0, %1), %%eax \n\t"
1911 "leal (%%eax, %1, 4), %%ebx \n\t"
1912 // 0 1 2 3 4 5 6 7 8 9
1913 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
1914
1915 "movq (%0), %%mm0 \n\t"
1916 "movq (%%eax, %1), %%mm1 \n\t"
1917 PAVGB(%%mm1, %%mm0)\
1918 "movq %%mm0, (%%eax) \n\t"
1919 "movq (%0, %1, 4), %%mm0 \n\t"
1920 PAVGB(%%mm0, %%mm1)\
1921 "movq %%mm1, (%%eax, %1, 2) \n\t"
1922 "movq (%%ebx, %1), %%mm1 \n\t"
1923 PAVGB(%%mm1, %%mm0)\
1924 "movq %%mm0, (%%ebx) \n\t"
1925 "movq (%0, %1, 8), %%mm0 \n\t"
1926 PAVGB(%%mm0, %%mm1)\
1927 "movq %%mm1, (%%ebx, %1, 2) \n\t"
1928
1929 : : "r" (src), "r" (stride)
1930 : "%eax", "%ebx"
1931 );
1932 #else
1933 int x;
1934 for(x=0; x<8; x++)
1935 {
1936 src[stride] = (src[0] + src[stride*2])>>1;
1937 src[stride*3] = (src[stride*2] + src[stride*4])>>1;
1938 src[stride*5] = (src[stride*4] + src[stride*6])>>1;
1939 src[stride*7] = (src[stride*6] + src[stride*8])>>1;
1940 src++;
1941 }
1942 #endif
1943 }
1944
1945 /**
1946 * Deinterlaces the given block
1947 * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block
1948 */
1949 static inline void deInterlaceInterpolateLinearLastRow(uint8_t src[], int stride)
1950 {
1951 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1952 asm volatile(
1953 "leal (%0, %1), %%eax \n\t"
1954 "leal (%%eax, %1, 4), %%ebx \n\t"
1955 // 0 1 2 3 4 5 6 7 8 9
1956 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
1957
1958 "movq (%0), %%mm0 \n\t"
1959 "movq (%%eax, %1), %%mm1 \n\t"
1960 PAVGB(%%mm1, %%mm0)\
1961 "movq %%mm0, (%%eax) \n\t"
1962 "movq (%0, %1, 4), %%mm0 \n\t"
1963 PAVGB(%%mm0, %%mm1)\
1964 "movq %%mm1, (%%eax, %1, 2) \n\t"
1965 "movq (%%ebx, %1), %%mm1 \n\t"
1966 PAVGB(%%mm1, %%mm0)\
1967 "movq %%mm0, (%%ebx) \n\t"
1968 "movq %%mm1, (%%ebx, %1, 2) \n\t"
1969
1970
1971 : : "r" (src), "r" (stride)
1972 : "%eax", "%ebx"
1973 );
1974 #else
1975 int x;
1976 for(x=0; x<8; x++)
1977 {
1978 src[stride] = (src[0] + src[stride*2])>>1;
1979 src[stride*3] = (src[stride*2] + src[stride*4])>>1;
1980 src[stride*5] = (src[stride*4] + src[stride*6])>>1;
1981 src[stride*7] = src[stride*6];
1982 src++;
1983 }
1984 #endif
1985 }
1986
1987 /**
1988 * Deinterlaces the given block
1989 * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block
1990 * will shift the image up by 1 line (FIXME if this is a problem)
1991 */
1992 static inline void deInterlaceBlendLinear(uint8_t src[], int stride)
1993 {
1994 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1995 asm volatile(
1996 "leal (%0, %1), %%eax \n\t"
1997 "leal (%%eax, %1, 4), %%ebx \n\t"
1998 // 0 1 2 3 4 5 6 7 8 9
1999 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
2000
2001 "movq (%0), %%mm0 \n\t" // L0
2002 "movq (%%eax, %1), %%mm1 \n\t" // L2
2003 PAVGB(%%mm1, %%mm0) // L0+L2
2004 "movq (%%eax), %%mm2 \n\t" // L1
2005 PAVGB(%%mm2, %%mm0)
2006 "movq %%mm0, (%0) \n\t"
2007 "movq (%%eax, %1, 2), %%mm0 \n\t" // L3
2008 PAVGB(%%mm0, %%mm2) // L1+L3
2009 PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3
2010 "movq %%mm2, (%%eax) \n\t"
2011 "movq (%0, %1, 4), %%mm2 \n\t" // L4
2012 PAVGB(%%mm2, %%mm1) // L2+L4
2013 PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4
2014 "movq %%mm1, (%%eax, %1) \n\t"
2015 "movq (%%ebx), %%mm1 \n\t" // L5
2016 PAVGB(%%mm1, %%mm0) // L3+L5
2017 PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5
2018 "movq %%mm0, (%%eax, %1, 2) \n\t"
2019 "movq (%%ebx, %1), %%mm0 \n\t" // L6
2020 PAVGB(%%mm0, %%mm2) // L4+L6
2021 PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6
2022 "movq %%mm2, (%0, %1, 4) \n\t"
2023 "movq (%%ebx, %1, 2), %%mm2 \n\t" // L7
2024 PAVGB(%%mm2, %%mm1) // L5+L7
2025 PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7
2026 "movq %%mm1, (%%ebx) \n\t"
2027 "movq (%0, %1, 8), %%mm1 \n\t" // L8
2028 PAVGB(%%mm1, %%mm0) // L6+L8
2029 PAVGB(%%mm2, %%mm0) // 2L7 + L6 + L8
2030 "movq %%mm0, (%%ebx, %1) \n\t"
2031 "movq (%%ebx, %1, 4), %%mm0 \n\t" // L9
2032 PAVGB(%%mm0, %%mm2) // L7+L9
2033 PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9
2034 "movq %%mm2, (%%ebx, %1, 2) \n\t"
2035
2036
2037 : : "r" (src), "r" (stride)
2038 : "%eax", "%ebx"
2039 );
2040 #else
2041 int x;
2042 for(x=0; x<8; x++)
2043 {
2044 src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2;
2045 src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2;
2046 src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2;
2047 src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2;
2048 src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2;
2049 src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2;
2050 src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2;
2051 src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2;
2052 src++;
2053 }
2054 #endif
2055 }
2056
2057 /**
2058 * Deinterlaces the given block
2059 * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block
2060 * will shift the image up by 1 line (FIXME if this is a problem)
2061 */
2062 static inline void deInterlaceBlendLinearLastRow(uint8_t src[], int stride)
2063 {
2064 #if defined (HAVE_MMSX2) || defined (HAVE_3DNOW)
2065 asm volatile(
2066 "leal (%0, %1), %%eax \n\t"
2067 "leal (%%eax, %1, 4), %%ebx \n\t"
2068 // 0 1 2 3 4 5 6 7 8 9
2069 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
2070
2071 "movq (%0), %%mm0 \n\t" // L0
2072 "movq (%%eax, %1), %%mm1 \n\t" // L2
2073 PAVGB(%%mm1, %%mm0) // L0+L2
2074 "movq (%%eax), %%mm2 \n\t" // L1
2075 PAVGB(%%mm2, %%mm0)
2076 "movq %%mm0, (%0) \n\t"
2077 "movq (%%eax, %1, 2), %%mm0 \n\t" // L3
2078 PAVGB(%%mm0, %%mm2) // L1+L3
2079 PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3
2080 "movq %%mm2, (%%eax) \n\t"
2081 "movq (%0, %1, 4), %%mm2 \n\t" // L4
2082 PAVGB(%%mm2, %%mm1) // L2+L4
2083 PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4
2084 "movq %%mm1, (%%eax, %1) \n\t"
2085 "movq (%%ebx), %%mm1 \n\t" // L5
2086 PAVGB(%%mm1, %%mm0) // L3+L5
2087 PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5
2088 "movq %%mm0, (%%eax, %1, 2) \n\t"
2089 "movq (%%ebx, %1), %%mm0 \n\t" // L6
2090 PAVGB(%%mm0, %%mm2) // L4+L6
2091 PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6
2092 "movq %%mm2, (%0, %1, 4) \n\t"
2093 "movq (%%ebx, %1, 2), %%mm2 \n\t" // L7
2094 PAVGB(%%mm2, %%mm1) // L5+L7
2095 PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7
2096 "movq %%mm1, (%%ebx) \n\t"
2097 PAVGB(%%mm2, %%mm0) // L7 + L8
2098 "movq %%mm0, (%%ebx, %1) \n\t"
2099 "movq %%mm0, (%%ebx, %1, 2) \n\t"
2100
2101 : : "r" (src), "r" (stride)
2102 : "%eax", "%ebx"
2103 );
2104 #else
2105 int x;
2106 for(x=0; x<8; x++)
2107 {
2108 src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2;
2109 src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2;
2110 src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2;
2111 src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2;
2112 src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2;
2113 src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2;
2114 src[stride*6] = (src[stride*6] + src[stride*7])>>1;
2115 src[stride*7] = src[stride*6];
2116 src++;
2117 }
2118 #endif
2119 }
2120
2121 /**
2122 * Deinterlaces the given block
2123 * will be called for every 8x8 block, except the last row, and can read & write into an 8x16 block
2124 */
2125 static inline void deInterlaceMedian(uint8_t src[], int stride)
2126 {
2127 #if defined (HAVE_MMX2)
2128 asm volatile(
2129 "leal (%0, %1), %%eax \n\t"
2130 "leal (%%eax, %1, 4), %%ebx \n\t"
2131 // 0 1 2 3 4 5 6 7 8 9
2132 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
2133
2134 "movq (%0), %%mm0 \n\t" //
2135 "movq (%%eax, %1), %%mm2 \n\t" //
2136 "movq (%%eax), %%mm1 \n\t" //
2137 "movq %%mm0, %%mm3 \n\t"
2138 "pmaxub %%mm1, %%mm0 \n\t" //
2139 "pminub %%mm3, %%mm1 \n\t" //
2140 "pmaxub %%mm2, %%mm1 \n\t" //
2141 "pminub %%mm1, %%mm0 \n\t"
2142 "movq %%mm0, (%%eax) \n\t"
2143
2144 "movq (%0, %1, 4), %%mm0 \n\t" //
2145 "movq (%%eax, %1, 2), %%mm1 \n\t" //
2146 "movq %%mm2, %%mm3 \n\t"
2147 "pmaxub %%mm1, %%mm2 \n\t" //
2148 "pminub %%mm3, %%mm1 \n\t" //
2149 "pmaxub %%mm0, %%mm1 \n\t" //
2150 "pminub %%mm1, %%mm2 \n\t"
2151 "movq %%mm2, (%%eax, %1, 2) \n\t"
2152
2153 "movq (%%ebx), %%mm2 \n\t" //
2154 "movq (%%ebx, %1), %%mm1 \n\t" //
2155 "movq %%mm2, %%mm3 \n\t"
2156 "pmaxub %%mm0, %%mm2 \n\t" //
2157 "pminub %%mm3, %%mm0 \n\t" //
2158 "pmaxub %%mm1, %%mm0 \n\t" //
2159 "pminub %%mm0, %%mm2 \n\t"
2160 "movq %%mm2, (%%ebx) \n\t"
2161
2162 "movq (%%ebx, %1, 2), %%mm2 \n\t" //
2163 "movq (%0, %1, 8), %%mm0 \n\t" //
2164 "movq %%mm2, %%mm3 \n\t"
2165 "pmaxub %%mm0, %%mm2 \n\t" //
2166 "pminub %%mm3, %%mm0 \n\t" //
2167 "pmaxub %%mm1, %%mm0 \n\t" //
2168 "pminub %%mm0, %%mm2 \n\t"
2169 "movq %%mm2, (%%ebx, %1, 2) \n\t"
2170
2171
2172 : : "r" (src), "r" (stride)
2173 : "%eax", "%ebx"
2174 );
2175 #else
2176 //FIXME
2177 int x;
2178 for(x=0; x<8; x++)
2179 {
2180 src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2;
2181 src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2;
2182 src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2;
2183 src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2;
2184 src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2;
2185 src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2;
2186 src[stride*6] = (src[stride*6] + 2*src[stride*7] + src[stride*8])>>2;
2187 src[stride*7] = (src[stride*7] + 2*src[stride*8] + src[stride*9])>>2;
2188 src++;
2189 }
2190 #endif
2191 }
2192
2193 /**
2194 * Deinterlaces the given block
2195 * will be called for every 8x8 block, in the last row, and can read & write into an 8x8 block
2196 * will shift the image up by 1 line (FIXME if this is a problem)
2197 */
2198 static inline void deInterlaceMedianLastRow(uint8_t src[], int stride)
2199 {
2200 #if defined (HAVE_MMX2)
2201 asm volatile(
2202 "leal (%0, %1), %%eax \n\t"
2203 "leal (%%eax, %1, 4), %%ebx \n\t"
2204 // 0 1 2 3 4 5 6 7 8 9
2205 // %0 eax eax+%1 eax+2%1 %0+4%1 ebx ebx+%1 ebx+2%1 %0+8%1 ebx+4%1
2206
2207 "movq (%0), %%mm0 \n\t" //
2208 "movq (%%eax, %1), %%mm2 \n\t" //
2209 "movq (%%eax), %%mm1 \n\t" //
2210 "movq %%mm0, %%mm3 \n\t"
2211 "pmaxub %%mm1, %%mm0 \n\t" //
2212 "pminub %%mm3, %%mm1 \n\t" //
2213 "pmaxub %%mm2, %%mm1 \n\t" //
2214 "pminub %%mm1, %%mm0 \n\t"
2215 "movq %%mm0, (%%eax) \n\t"
2216
2217 "movq (%0, %1, 4), %%mm0 \n\t" //
2218 "movq (%%eax, %1, 2), %%mm1 \n\t" //
2219 "movq %%mm2, %%mm3 \n\t"
2220 "pmaxub %%mm1, %%mm2 \n\t" //
2221 "pminub %%mm3, %%mm1 \n\t" //
2222 "pmaxub %%mm0, %%mm1 \n\t" //
2223 "pminub %%mm1, %%mm2 \n\t"
2224 "movq %%mm2, (%%eax, %1, 2) \n\t"
2225
2226 "movq (%%ebx), %%mm2 \n\t" //
2227 "movq (%%ebx, %1), %%mm1 \n\t" //
2228 "movq %%mm2, %%mm3 \n\t"
2229 "pmaxub %%mm0, %%mm2 \n\t" //
2230 "pminub %%mm3, %%mm0 \n\t" //
2231 "pmaxub %%mm1, %%mm0 \n\t" //
2232 "pminub %%mm0, %%mm2 \n\t"
2233 "movq %%mm2, (%%ebx) \n\t"
2234
2235 "movq %%mm1, (%%ebx, %1, 2) \n\t"
2236
2237 : : "r" (src), "r" (stride)
2238 : "%eax", "%ebx"
2239 );
2240 #else
2241 //FIXME
2242 int x;
2243 for(x=0; x<8; x++)
2244 {
2245 src[0 ] = (src[0 ] + 2*src[stride ] + src[stride*2])>>2;
2246 src[stride ] = (src[stride ] + 2*src[stride*2] + src[stride*3])>>2;
2247 src[stride*2] = (src[stride*2] + 2*src[stride*3] + src[stride*4])>>2;
2248 src[stride*3] = (src[stride*3] + 2*src[stride*4] + src[stride*5])>>2;
2249 src[stride*4] = (src[stride*4] + 2*src[stride*5] + src[stride*6])>>2;
2250 src[stride*5] = (src[stride*5] + 2*src[stride*6] + src[stride*7])>>2;
2251 src[stride*6] = (src[stride*6] + src[stride*7])>>1;
2252 src[stride*7] = src[stride*6];
2253 src++;
2254 }
2255 #endif
2256 }
2257
2258
1831 #ifdef HAVE_ODIVX_POSTPROCESS 2259 #ifdef HAVE_ODIVX_POSTPROCESS
1832 #include "../opendivx/postprocess.h" 2260 #include "../opendivx/postprocess.h"
1833 int use_old_pp=0; 2261 int use_old_pp=0;
1834 #endif 2262 #endif
1835 2263
1839 /** 2267 /**
1840 * ... 2268 * ...
1841 * the mode value is interpreted as a quality value if its negative, its range is then (-1 ... -63) 2269 * the mode value is interpreted as a quality value if its negative, its range is then (-1 ... -63)
1842 * -63 is best quality -1 is worst 2270 * -63 is best quality -1 is worst
1843 */ 2271 */
1844 //extern "C"{
1845 void postprocess(unsigned char * src[], int src_stride, 2272 void postprocess(unsigned char * src[], int src_stride,
1846 unsigned char * dst[], int dst_stride, 2273 unsigned char * dst[], int dst_stride,
1847 int horizontal_size, int vertical_size, 2274 int horizontal_size, int vertical_size,
1848 QP_STORE_T *QP_store, int QP_stride, 2275 QP_STORE_T *QP_store, int QP_stride,
1849 int mode) 2276 int mode)
2194 if(!isColor) yHistogram[ srcBlock[0] ]++; 2621 if(!isColor) yHistogram[ srcBlock[0] ]++;
2195 2622
2196 blockCopy(vertBlock + dstStride*2, dstStride, 2623 blockCopy(vertBlock + dstStride*2, dstStride,
2197 vertSrcBlock + srcStride*2, srcStride, 8, mode & LEVEL_FIX); 2624 vertSrcBlock + srcStride*2, srcStride, 8, mode & LEVEL_FIX);
2198 2625
2626 if(mode & LINEAR_IPOL_DEINT_FILTER)
2627 deInterlaceInterpolateLinear(dstBlock, dstStride);
2628 else if(mode & LINEAR_BLEND_DEINT_FILTER)
2629 deInterlaceBlendLinear(dstBlock, dstStride);
2630 else if(mode & MEDIAN_DEINT_FILTER)
2631 deInterlaceMedian(dstBlock, dstStride);
2632 /* else if(mode & CUBIC_IPOL_DEINT_FILTER)
2633 deInterlaceInterpolateCubic(dstBlock, dstStride);
2634 else if(mode & CUBIC_BLEND_DEINT_FILTER)
2635 deInterlaceBlendCubic(dstBlock, dstStride);
2636 */
2199 2637
2200 #ifdef MORE_TIMEING 2638 #ifdef MORE_TIMEING
2201 T1= rdtsc(); 2639 T1= rdtsc();
2202 memcpyTime+= T1-T0; 2640 memcpyTime+= T1-T0;
2203 T0=T1; 2641 T0=T1;
2224 vertTime+= T1-T0; 2662 vertTime+= T1-T0;
2225 T0=T1; 2663 T0=T1;
2226 #endif 2664 #endif
2227 } 2665 }
2228 else 2666 else
2667 {
2229 blockCopy(vertBlock + dstStride*1, dstStride, 2668 blockCopy(vertBlock + dstStride*1, dstStride,
2230 vertSrcBlock + srcStride*1, srcStride, 4, mode & LEVEL_FIX); 2669 vertSrcBlock + srcStride*1, srcStride, 4, mode & LEVEL_FIX);
2231 2670
2671 if(mode & LINEAR_IPOL_DEINT_FILTER)
2672 deInterlaceInterpolateLinearLastRow(dstBlock, dstStride);
2673 else if(mode & LINEAR_BLEND_DEINT_FILTER)
2674 deInterlaceBlendLinearLastRow(dstBlock, dstStride);
2675 else if(mode & MEDIAN_DEINT_FILTER)
2676 deInterlaceMedianLastRow(dstBlock, dstStride);
2677 /* else if(mode & CUBIC_IPOL_DEINT_FILTER)
2678 deInterlaceInterpolateCubicLastRow(dstBlock, dstStride);
2679 else if(mode & CUBIC_BLEND_DEINT_FILTER)
2680 deInterlaceBlendCubicLastRow(dstBlock, dstStride);
2681 */
2682 }
2232 2683
2233 if(x - 8 >= 0 && x<width) 2684 if(x - 8 >= 0 && x<width)
2234 { 2685 {
2235 #ifdef MORE_TIMEING 2686 #ifdef MORE_TIMEING
2236 T0= rdtsc(); 2687 T0= rdtsc();