comparison postproc/postprocess.c @ 2570:af43a83122fc

minor speedup cleanup
author michael
date Tue, 30 Oct 2001 21:14:02 +0000
parents 42d5846eeb51
children 3b05a6b4d870
comparison
equal deleted inserted replaced
2569:30b736e7feef 2570:af43a83122fc
60 compare the quality & speed of all filters 60 compare the quality & speed of all filters
61 split this huge file 61 split this huge file
62 fix warnings (unused vars, ...) 62 fix warnings (unused vars, ...)
63 noise reduction filters 63 noise reduction filters
64 border remover 64 border remover
65 optimize c versions
65 ... 66 ...
66 67
67 Notes: 68 Notes:
68 */ 69 */
69 70
415 static inline void doVertLowPass(uint8_t *src, int stride, int QP) 416 static inline void doVertLowPass(uint8_t *src, int stride, int QP)
416 { 417 {
417 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) 418 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
418 src+= stride*3; 419 src+= stride*3;
419 asm volatile( //"movv %0 %1 %2\n\t" 420 asm volatile( //"movv %0 %1 %2\n\t"
420 "pushl %0 \n\t"
421 "movq pQPb, %%mm0 \n\t" // QP,..., QP 421 "movq pQPb, %%mm0 \n\t" // QP,..., QP
422 422
423 "movq (%0), %%mm6 \n\t" 423 "movq (%0), %%mm6 \n\t"
424 "movq (%0, %1), %%mm5 \n\t" 424 "movq (%0, %1), %%mm5 \n\t"
425 "movq %%mm5, %%mm1 \n\t" 425 "movq %%mm5, %%mm1 \n\t"
533 PAVGB(%%mm7, %%mm5) // 11 6 /8 533 PAVGB(%%mm7, %%mm5) // 11 6 /8
534 534
535 PAVGB(%%mm3, %%mm0) // 112 /4 535 PAVGB(%%mm3, %%mm0) // 112 /4
536 PAVGB(%%mm0, %%mm5) // 112246 /16 536 PAVGB(%%mm0, %%mm5) // 112246 /16
537 "movq %%mm5, (%%eax, %1, 4) \n\t" // X 537 "movq %%mm5, (%%eax, %1, 4) \n\t" // X
538 "popl %0\n\t" 538 "subl %1, %0 \n\t"
539 539
540 : 540 :
541 : "r" (src), "r" (stride) 541 : "r" (src), "r" (stride)
542 : "%eax", "%ebx" 542 : "%eax", "%ebx"
543 ); 543 );
1165 "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7 1165 "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7
1166 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7 1166 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7
1167 1167
1168 "movq temp0, %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 1168 "movq temp0, %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
1169 "movq temp1, %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 1169 "movq temp1, %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
1170 //FIXME pxor, psubw, pmax for abs 1170
1171 #ifdef HAVE_MMX2
1172 "movq %%mm7, %%mm6 \n\t" // 0
1173 "psubw %%mm0, %%mm6 \n\t"
1174 "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
1175 "movq %%mm7, %%mm6 \n\t" // 0
1176 "psubw %%mm1, %%mm6 \n\t"
1177 "pmaxsw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7|
1178 "movq %%mm7, %%mm6 \n\t" // 0
1179 "psubw %%mm2, %%mm6 \n\t"
1180 "pmaxsw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3|
1181 "movq %%mm7, %%mm6 \n\t" // 0
1182 "psubw %%mm3, %%mm6 \n\t"
1183 "pmaxsw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
1184 #else
1171 "movq %%mm7, %%mm6 \n\t" // 0 1185 "movq %%mm7, %%mm6 \n\t" // 0
1172 "pcmpgtw %%mm0, %%mm6 \n\t" 1186 "pcmpgtw %%mm0, %%mm6 \n\t"
1173 "pxor %%mm6, %%mm0 \n\t" 1187 "pxor %%mm6, %%mm0 \n\t"
1174 "psubw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| 1188 "psubw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
1175 "movq %%mm7, %%mm6 \n\t" // 0 1189 "movq %%mm7, %%mm6 \n\t" // 0
1176 "pcmpgtw %%mm1, %%mm6 \n\t" 1190 "pcmpgtw %%mm1, %%mm6 \n\t"
1177 "pxor %%mm6, %%mm1 \n\t" 1191 "pxor %%mm6, %%mm1 \n\t"
1178 "psubw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7| 1192 "psubw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7|
1179
1180 "movq %%mm7, %%mm6 \n\t" // 0 1193 "movq %%mm7, %%mm6 \n\t" // 0
1181 "pcmpgtw %%mm2, %%mm6 \n\t" 1194 "pcmpgtw %%mm2, %%mm6 \n\t"
1182 "pxor %%mm6, %%mm2 \n\t" 1195 "pxor %%mm6, %%mm2 \n\t"
1183 "psubw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3| 1196 "psubw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3|
1184 "movq %%mm7, %%mm6 \n\t" // 0 1197 "movq %%mm7, %%mm6 \n\t" // 0
1185 "pcmpgtw %%mm3, %%mm6 \n\t" 1198 "pcmpgtw %%mm3, %%mm6 \n\t"
1186 "pxor %%mm6, %%mm3 \n\t" 1199 "pxor %%mm6, %%mm3 \n\t"
1187 "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| 1200 "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
1201 #endif
1188 1202
1189 #ifdef HAVE_MMX2 1203 #ifdef HAVE_MMX2
1190 "pminsw %%mm2, %%mm0 \n\t" 1204 "pminsw %%mm2, %%mm0 \n\t"
1191 "pminsw %%mm3, %%mm1 \n\t" 1205 "pminsw %%mm3, %%mm1 \n\t"
1192 #else 1206 #else
1979 PAVGB(t0, lx) /* (src[-1] + src[+1])/2 */\ 1993 PAVGB(t0, lx) /* (src[-1] + src[+1])/2 */\
1980 PAVGB(sx, lx) /* (src[-1] + 2src[0] + src[+1])/4 */\ 1994 PAVGB(sx, lx) /* (src[-1] + 2src[0] + src[+1])/4 */\
1981 PAVGB(lx, pplx) \ 1995 PAVGB(lx, pplx) \
1982 "movq " #lx ", temp1 \n\t"\ 1996 "movq " #lx ", temp1 \n\t"\
1983 "movq temp0, " #lx " \n\t"\ 1997 "movq temp0, " #lx " \n\t"\
1984 "psubusb " #lx ", " #t1 " \n\t"\ 1998 "psubusb " #lx ", " #t1 " \n\t"\
1985 "psubusb " #lx ", " #t0 " \n\t"\ 1999 "psubusb " #lx ", " #t0 " \n\t"\
1986 "psubusb " #lx ", " #sx " \n\t"\ 2000 "psubusb " #lx ", " #sx " \n\t"\
1987 "movq b00, " #lx " \n\t"\ 2001 "movq b00, " #lx " \n\t"\
1988 "pcmpeqb " #lx ", " #t1 " \n\t" /* src[-1] > a ? 0 : -1*/\ 2002 "pcmpeqb " #lx ", " #t1 " \n\t" /* src[-1] > a ? 0 : -1*/\
1989 "pcmpeqb " #lx ", " #t0 " \n\t" /* src[+1] > a ? 0 : -1*/\ 2003 "pcmpeqb " #lx ", " #t0 " \n\t" /* src[+1] > a ? 0 : -1*/\
1990 "pcmpeqb " #lx ", " #sx " \n\t" /* src[0] > a ? 0 : -1*/\ 2004 "pcmpeqb " #lx ", " #sx " \n\t" /* src[0] > a ? 0 : -1*/\
1991 "paddb " #t1 ", " #t0 " \n\t"\ 2005 "paddb " #t1 ", " #t0 " \n\t"\
1992 "paddb " #t0 ", " #sx " \n\t"\ 2006 "paddb " #t0 ", " #sx " \n\t"\
1993 \ 2007 \
1994 PAVGB(plx, pplx) /* filtered */\ 2008 PAVGB(plx, pplx) /* filtered */\
1995 "movq " #dst ", " #t0 " \n\t" /* dst */\ 2009 "movq " #dst ", " #t0 " \n\t" /* dst */\
2000 PMINUB(t1, pplx, t0)\ 2014 PMINUB(t1, pplx, t0)\
2001 "paddb " #sx ", " #ppsx " \n\t"\ 2015 "paddb " #sx ", " #ppsx " \n\t"\
2002 "paddb " #psx ", " #ppsx " \n\t"\ 2016 "paddb " #psx ", " #ppsx " \n\t"\
2003 "#paddb b02, " #ppsx " \n\t"\ 2017 "#paddb b02, " #ppsx " \n\t"\
2004 "pand b08, " #ppsx " \n\t"\ 2018 "pand b08, " #ppsx " \n\t"\
2005 "pcmpeqb " #lx ", " #ppsx " \n\t"\ 2019 "pcmpeqb " #lx ", " #ppsx " \n\t"\
2006 "pand " #ppsx ", " #pplx " \n\t"\ 2020 "pand " #ppsx ", " #pplx " \n\t"\
2007 "pandn " #dst ", " #ppsx " \n\t"\ 2021 "pandn " #dst ", " #ppsx " \n\t"\
2008 "por " #pplx ", " #ppsx " \n\t"\ 2022 "por " #pplx ", " #ppsx " \n\t"\
2009 "movq " #ppsx ", " #dst " \n\t"\ 2023 "movq " #ppsx ", " #dst " \n\t"\
2010 "movq temp1, " #lx " \n\t" 2024 "movq temp1, " #lx " \n\t"
2011 2025
2012 /* 2026 /*
2013 0000000 2027 0000000
2994 #endif 3008 #endif
2995 #ifdef TIMING 3009 #ifdef TIMING
2996 long long memcpyTime=0, vertTime=0, horizTime=0, sumTime; 3010 long long memcpyTime=0, vertTime=0, horizTime=0, sumTime;
2997 sumTime= rdtsc(); 3011 sumTime= rdtsc();
2998 #endif 3012 #endif
3013 //mode= 0x7F;
2999 3014
3000 if(tempDst==NULL) 3015 if(tempDst==NULL)
3001 { 3016 {
3002 tempDst= (uint8_t*)memalign(8, 1024*24); 3017 tempDst= (uint8_t*)memalign(8, 1024*24);
3003 tempSrc= (uint8_t*)memalign(8, 1024*24); 3018 tempSrc= (uint8_t*)memalign(8, 1024*24);