Mercurial > mplayer.hg
comparison postproc/postprocess.c @ 2570:af43a83122fc
minor speedup
cleanup
author | michael |
---|---|
date | Tue, 30 Oct 2001 21:14:02 +0000 |
parents | 42d5846eeb51 |
children | 3b05a6b4d870 |
comparison
equal
deleted
inserted
replaced
2569:30b736e7feef | 2570:af43a83122fc |
---|---|
60 compare the quality & speed of all filters | 60 compare the quality & speed of all filters |
61 split this huge file | 61 split this huge file |
62 fix warnings (unused vars, ...) | 62 fix warnings (unused vars, ...) |
63 noise reduction filters | 63 noise reduction filters |
64 border remover | 64 border remover |
65 optimize c versions | |
65 ... | 66 ... |
66 | 67 |
67 Notes: | 68 Notes: |
68 */ | 69 */ |
69 | 70 |
415 static inline void doVertLowPass(uint8_t *src, int stride, int QP) | 416 static inline void doVertLowPass(uint8_t *src, int stride, int QP) |
416 { | 417 { |
417 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | 418 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
418 src+= stride*3; | 419 src+= stride*3; |
419 asm volatile( //"movv %0 %1 %2\n\t" | 420 asm volatile( //"movv %0 %1 %2\n\t" |
420 "pushl %0 \n\t" | |
421 "movq pQPb, %%mm0 \n\t" // QP,..., QP | 421 "movq pQPb, %%mm0 \n\t" // QP,..., QP |
422 | 422 |
423 "movq (%0), %%mm6 \n\t" | 423 "movq (%0), %%mm6 \n\t" |
424 "movq (%0, %1), %%mm5 \n\t" | 424 "movq (%0, %1), %%mm5 \n\t" |
425 "movq %%mm5, %%mm1 \n\t" | 425 "movq %%mm5, %%mm1 \n\t" |
533 PAVGB(%%mm7, %%mm5) // 11 6 /8 | 533 PAVGB(%%mm7, %%mm5) // 11 6 /8 |
534 | 534 |
535 PAVGB(%%mm3, %%mm0) // 112 /4 | 535 PAVGB(%%mm3, %%mm0) // 112 /4 |
536 PAVGB(%%mm0, %%mm5) // 112246 /16 | 536 PAVGB(%%mm0, %%mm5) // 112246 /16 |
537 "movq %%mm5, (%%eax, %1, 4) \n\t" // X | 537 "movq %%mm5, (%%eax, %1, 4) \n\t" // X |
538 "popl %0\n\t" | 538 "subl %1, %0 \n\t" |
539 | 539 |
540 : | 540 : |
541 : "r" (src), "r" (stride) | 541 : "r" (src), "r" (stride) |
542 : "%eax", "%ebx" | 542 : "%eax", "%ebx" |
543 ); | 543 ); |
1165 "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7 | 1165 "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7 |
1166 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7 | 1166 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7 |
1167 | 1167 |
1168 "movq temp0, %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 | 1168 "movq temp0, %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 |
1169 "movq temp1, %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 | 1169 "movq temp1, %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 |
1170 //FIXME pxor, psubw, pmax for abs | 1170 |
1171 #ifdef HAVE_MMX2 | |
1172 "movq %%mm7, %%mm6 \n\t" // 0 | |
1173 "psubw %%mm0, %%mm6 \n\t" | |
1174 "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| | |
1175 "movq %%mm7, %%mm6 \n\t" // 0 | |
1176 "psubw %%mm1, %%mm6 \n\t" | |
1177 "pmaxsw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7| | |
1178 "movq %%mm7, %%mm6 \n\t" // 0 | |
1179 "psubw %%mm2, %%mm6 \n\t" | |
1180 "pmaxsw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3| | |
1181 "movq %%mm7, %%mm6 \n\t" // 0 | |
1182 "psubw %%mm3, %%mm6 \n\t" | |
1183 "pmaxsw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| | |
1184 #else | |
1171 "movq %%mm7, %%mm6 \n\t" // 0 | 1185 "movq %%mm7, %%mm6 \n\t" // 0 |
1172 "pcmpgtw %%mm0, %%mm6 \n\t" | 1186 "pcmpgtw %%mm0, %%mm6 \n\t" |
1173 "pxor %%mm6, %%mm0 \n\t" | 1187 "pxor %%mm6, %%mm0 \n\t" |
1174 "psubw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| | 1188 "psubw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| |
1175 "movq %%mm7, %%mm6 \n\t" // 0 | 1189 "movq %%mm7, %%mm6 \n\t" // 0 |
1176 "pcmpgtw %%mm1, %%mm6 \n\t" | 1190 "pcmpgtw %%mm1, %%mm6 \n\t" |
1177 "pxor %%mm6, %%mm1 \n\t" | 1191 "pxor %%mm6, %%mm1 \n\t" |
1178 "psubw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7| | 1192 "psubw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7| |
1179 | |
1180 "movq %%mm7, %%mm6 \n\t" // 0 | 1193 "movq %%mm7, %%mm6 \n\t" // 0 |
1181 "pcmpgtw %%mm2, %%mm6 \n\t" | 1194 "pcmpgtw %%mm2, %%mm6 \n\t" |
1182 "pxor %%mm6, %%mm2 \n\t" | 1195 "pxor %%mm6, %%mm2 \n\t" |
1183 "psubw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3| | 1196 "psubw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3| |
1184 "movq %%mm7, %%mm6 \n\t" // 0 | 1197 "movq %%mm7, %%mm6 \n\t" // 0 |
1185 "pcmpgtw %%mm3, %%mm6 \n\t" | 1198 "pcmpgtw %%mm3, %%mm6 \n\t" |
1186 "pxor %%mm6, %%mm3 \n\t" | 1199 "pxor %%mm6, %%mm3 \n\t" |
1187 "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| | 1200 "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| |
1201 #endif | |
1188 | 1202 |
1189 #ifdef HAVE_MMX2 | 1203 #ifdef HAVE_MMX2 |
1190 "pminsw %%mm2, %%mm0 \n\t" | 1204 "pminsw %%mm2, %%mm0 \n\t" |
1191 "pminsw %%mm3, %%mm1 \n\t" | 1205 "pminsw %%mm3, %%mm1 \n\t" |
1192 #else | 1206 #else |
1979 PAVGB(t0, lx) /* (src[-1] + src[+1])/2 */\ | 1993 PAVGB(t0, lx) /* (src[-1] + src[+1])/2 */\ |
1980 PAVGB(sx, lx) /* (src[-1] + 2src[0] + src[+1])/4 */\ | 1994 PAVGB(sx, lx) /* (src[-1] + 2src[0] + src[+1])/4 */\ |
1981 PAVGB(lx, pplx) \ | 1995 PAVGB(lx, pplx) \ |
1982 "movq " #lx ", temp1 \n\t"\ | 1996 "movq " #lx ", temp1 \n\t"\ |
1983 "movq temp0, " #lx " \n\t"\ | 1997 "movq temp0, " #lx " \n\t"\ |
1984 "psubusb " #lx ", " #t1 " \n\t"\ | 1998 "psubusb " #lx ", " #t1 " \n\t"\ |
1985 "psubusb " #lx ", " #t0 " \n\t"\ | 1999 "psubusb " #lx ", " #t0 " \n\t"\ |
1986 "psubusb " #lx ", " #sx " \n\t"\ | 2000 "psubusb " #lx ", " #sx " \n\t"\ |
1987 "movq b00, " #lx " \n\t"\ | 2001 "movq b00, " #lx " \n\t"\ |
1988 "pcmpeqb " #lx ", " #t1 " \n\t" /* src[-1] > a ? 0 : -1*/\ | 2002 "pcmpeqb " #lx ", " #t1 " \n\t" /* src[-1] > a ? 0 : -1*/\ |
1989 "pcmpeqb " #lx ", " #t0 " \n\t" /* src[+1] > a ? 0 : -1*/\ | 2003 "pcmpeqb " #lx ", " #t0 " \n\t" /* src[+1] > a ? 0 : -1*/\ |
1990 "pcmpeqb " #lx ", " #sx " \n\t" /* src[0] > a ? 0 : -1*/\ | 2004 "pcmpeqb " #lx ", " #sx " \n\t" /* src[0] > a ? 0 : -1*/\ |
1991 "paddb " #t1 ", " #t0 " \n\t"\ | 2005 "paddb " #t1 ", " #t0 " \n\t"\ |
1992 "paddb " #t0 ", " #sx " \n\t"\ | 2006 "paddb " #t0 ", " #sx " \n\t"\ |
1993 \ | 2007 \ |
1994 PAVGB(plx, pplx) /* filtered */\ | 2008 PAVGB(plx, pplx) /* filtered */\ |
1995 "movq " #dst ", " #t0 " \n\t" /* dst */\ | 2009 "movq " #dst ", " #t0 " \n\t" /* dst */\ |
2000 PMINUB(t1, pplx, t0)\ | 2014 PMINUB(t1, pplx, t0)\ |
2001 "paddb " #sx ", " #ppsx " \n\t"\ | 2015 "paddb " #sx ", " #ppsx " \n\t"\ |
2002 "paddb " #psx ", " #ppsx " \n\t"\ | 2016 "paddb " #psx ", " #ppsx " \n\t"\ |
2003 "#paddb b02, " #ppsx " \n\t"\ | 2017 "#paddb b02, " #ppsx " \n\t"\ |
2004 "pand b08, " #ppsx " \n\t"\ | 2018 "pand b08, " #ppsx " \n\t"\ |
2005 "pcmpeqb " #lx ", " #ppsx " \n\t"\ | 2019 "pcmpeqb " #lx ", " #ppsx " \n\t"\ |
2006 "pand " #ppsx ", " #pplx " \n\t"\ | 2020 "pand " #ppsx ", " #pplx " \n\t"\ |
2007 "pandn " #dst ", " #ppsx " \n\t"\ | 2021 "pandn " #dst ", " #ppsx " \n\t"\ |
2008 "por " #pplx ", " #ppsx " \n\t"\ | 2022 "por " #pplx ", " #ppsx " \n\t"\ |
2009 "movq " #ppsx ", " #dst " \n\t"\ | 2023 "movq " #ppsx ", " #dst " \n\t"\ |
2010 "movq temp1, " #lx " \n\t" | 2024 "movq temp1, " #lx " \n\t" |
2011 | 2025 |
2012 /* | 2026 /* |
2013 0000000 | 2027 0000000 |
2994 #endif | 3008 #endif |
2995 #ifdef TIMING | 3009 #ifdef TIMING |
2996 long long memcpyTime=0, vertTime=0, horizTime=0, sumTime; | 3010 long long memcpyTime=0, vertTime=0, horizTime=0, sumTime; |
2997 sumTime= rdtsc(); | 3011 sumTime= rdtsc(); |
2998 #endif | 3012 #endif |
3013 //mode= 0x7F; | |
2999 | 3014 |
3000 if(tempDst==NULL) | 3015 if(tempDst==NULL) |
3001 { | 3016 { |
3002 tempDst= (uint8_t*)memalign(8, 1024*24); | 3017 tempDst= (uint8_t*)memalign(8, 1024*24); |
3003 tempSrc= (uint8_t*)memalign(8, 1024*24); | 3018 tempSrc= (uint8_t*)memalign(8, 1024*24); |