libpostproc.hg: postprocess_template.c comparison

comparison postprocess_template.c @ 118:bdd1788fb53b libpostproc

Change semantic of CONFIG_*, HAVE_* and ARCH_*. They are now always defined to either 0 or 1.

author	aurel
date	Tue, 13 Jan 2009 23:44:16 +0000
parents	bf8f52662dc3
children	4a1602d552aa

comparison

equal deleted inserted replaced

-:3a76063f4145
+:bdd1788fb53b
 #undef PAVGB
 #undef PMINUB
 #undef PMAXUB
-#ifdef HAVE_MMX2
+#if   HAVE_MMX2
 #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
-#elif defined (HAVE_3DNOW)
+#elif HAVE_3DNOW
 #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
 #endif
 #define PAVGB(a,b)  REAL_PAVGB(a,b)
-#ifdef HAVE_MMX2
+#if   HAVE_MMX2
 #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
-#elif defined (HAVE_MMX)
+#elif HAVE_MMX
 #define PMINUB(b,a,t) \
 "movq " #a ", " #t " \n\t"\
 "psubusb " #b ", " #t " \n\t"\
 "psubb " #t ", " #a " \n\t"
 #endif
-#ifdef HAVE_MMX2
+#if   HAVE_MMX2
 #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
-#elif defined (HAVE_MMX)
+#elif HAVE_MMX
 #define PMAXUB(a,b) \
 "psubusb " #a ", " #b " \n\t"\
 "paddb " #a ", " #b " \n\t"
 #endif
 //FIXME? |255-0| = 1 (should not be a problem ...)
-#ifdef HAVE_MMX
+#if HAVE_MMX
 /**
 * Check if the middle 8x8 Block in the given 8x16 block is flat
 */
 static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
 int numEq= 0, dcOk;
 "pcmpgtb %%mm6, %%mm2                   \n\t"
 "paddb %%mm2, %%mm0                     \n\t"
 "psubusb %%mm3, %%mm4                   \n\t"
 "                                       \n\t"
-#ifdef HAVE_MMX2
+#if HAVE_MMX2
 "pxor %%mm7, %%mm7                      \n\t"
 "psadbw %%mm7, %%mm0                    \n\t"
 #else
 "movq %%mm0, %%mm1                      \n\t"
 "psrlw $8, %%mm0                        \n\t"
 /**
 * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle)
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16
 */
-#ifndef HAVE_ALTIVEC
+#if !HAVE_ALTIVEC
 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
 {
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if HAVE_MMX2 || HAVE_3DNOW
 src+= stride*3;
 __asm__ volatile(        //"movv %0 %1 %2\n\t"
 "movq %2, %%mm0                         \n\t"  // QP,..., QP
 "pxor %%mm4, %%mm4                      \n\t"
 :
 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
 : "%"REG_a, "%"REG_c
 );
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //HAVE_MMX2 || HAVE_3DNOW
 const int l1= stride;
 const int l2= stride + l1;
 const int l3= stride + l2;
 const int l4= stride + l3;
 const int l5= stride + l4;
 src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
 src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
 src++;
 }
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //HAVE_MMX2 || HAVE_3DNOW
 }
 #endif //HAVE_ALTIVEC
 #if 0
 /**
 x/8 = 1
 1 12 12 23
 */
 static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
 {
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if HAVE_MMX2 || HAVE_3DNOW
 src+= stride*3;
 // FIXME rounding
 __asm__ volatile(
 "pxor %%mm7, %%mm7                      \n\t" // 0
 "movq "MANGLE(b80)", %%mm6              \n\t" // MIN_SIGNED_BYTE
 :
 : "r" (src), "r" ((x86_reg)stride)
 : "%"REG_a, "%"REG_c
 );
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //HAVE_MMX2 || HAVE_3DNOW
 const int l1= stride;
 const int l2= stride + l1;
 const int l3= stride + l2;
 const int l4= stride + l3;
 const int l5= stride + l4;
 src[x+l5] -=v>>1;
 src[x+l6] -=v>>3;
 }
 }
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //HAVE_MMX2 || HAVE_3DNOW
 }
 #endif //0
 /**
 * Experimental Filter 1
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
 * MMX2 version does correct clipping C version does not
 */
 static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
 {
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if HAVE_MMX2 || HAVE_3DNOW
 src+= stride*3;
 __asm__ volatile(
 "pxor %%mm7, %%mm7                      \n\t" // 0
 "lea (%0, %1), %%"REG_a"                \n\t"
 :
 : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb)
 : "%"REG_a, "%"REG_c
 );
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //HAVE_MMX2 || HAVE_3DNOW
 const int l1= stride;
 const int l2= stride + l1;
 const int l3= stride + l2;
 const int l4= stride + l3;
 src[l6] -=v>>2;
 src[l7] -=v>>3;
 }
 src++;
 }
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //HAVE_MMX2 || HAVE_3DNOW
 }
-#ifndef HAVE_ALTIVEC
+#if !HAVE_ALTIVEC
 static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c)
 {
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if HAVE_MMX2 || HAVE_3DNOW
 /*
 uint8_t tmp[16];
 const int l1= stride;
 const int l2= stride + l1;
 const int l3= stride + l2;
 }
 }
 }
 }
 */
-#elif defined (HAVE_MMX)
+#elif HAVE_MMX
 src+= stride*4;
 __asm__ volatile(
 "pxor %%mm7, %%mm7                      \n\t"
 "lea -40(%%"REG_SP"), %%"REG_c"         \n\t" // make space for 4 8-byte vars
 "and "ALIGN_MASK", %%"REG_c"            \n\t" // align
 "psubw %%mm3, %%mm1                     \n\t" // 2H4 - 5H5 + 5H6 - 2H7
 "movq (%%"REG_c"), %%mm2                \n\t" // 2L0 - 5L1 + 5L2 - 2L3
 "movq 8(%%"REG_c"), %%mm3               \n\t" // 2H0 - 5H1 + 5H2 - 2H3
-#ifdef HAVE_MMX2
+#if HAVE_MMX2
 "movq %%mm7, %%mm6                      \n\t" // 0
 "psubw %%mm0, %%mm6                     \n\t"
 "pmaxsw %%mm6, %%mm0                    \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
 "movq %%mm7, %%mm6                      \n\t" // 0
 "psubw %%mm1, %%mm6                     \n\t"
 "pcmpgtw %%mm3, %%mm6                   \n\t"
 "pxor %%mm6, %%mm3                      \n\t"
 "psubw %%mm6, %%mm3                     \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
 #endif
-#ifdef HAVE_MMX2
+#if HAVE_MMX2
 "pminsw %%mm2, %%mm0                    \n\t"
 "pminsw %%mm3, %%mm1                    \n\t"
 #else
 "movq %%mm0, %%mm6                      \n\t"
 "psubusw %%mm2, %%mm6                   \n\t"
 "pxor %%mm6, %%mm2                      \n\t"
 "pxor %%mm7, %%mm3                      \n\t"
 "pand %%mm2, %%mm4                      \n\t"
 "pand %%mm3, %%mm5                      \n\t"
-#ifdef HAVE_MMX2
+#if HAVE_MMX2
 "pminsw %%mm0, %%mm4                    \n\t"
 "pminsw %%mm1, %%mm5                    \n\t"
 #else
 "movq %%mm4, %%mm2                      \n\t"
 "psubusw %%mm0, %%mm2                   \n\t"
 : "+r" (src)
 : "r" ((x86_reg)stride), "m" (c->pQPb)
 : "%"REG_a, "%"REG_c
 );
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //HAVE_MMX2 || HAVE_3DNOW
 const int l1= stride;
 const int l2= stride + l1;
 const int l3= stride + l2;
 const int l4= stride + l3;
 const int l5= stride + l4;
 src[l4]-= d;
 src[l5]+= d;
 }
 src++;
 }
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //HAVE_MMX2 || HAVE_3DNOW
 }
 #endif //HAVE_ALTIVEC
-#ifndef HAVE_ALTIVEC
+#if !HAVE_ALTIVEC
 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
 {
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if HAVE_MMX2 || HAVE_3DNOW
 __asm__ volatile(
 "pxor %%mm6, %%mm6                      \n\t"
 "pcmpeqb %%mm7, %%mm7                   \n\t"
 "movq %2, %%mm0                         \n\t"
 "punpcklbw %%mm6, %%mm0                 \n\t"
 //        0        1        2        3        4        5        6        7        8        9
 //        %0        eax        eax+%1        eax+2%1        %0+4%1        edx        edx+%1        edx+2%1        %0+8%1        edx+4%1
 #undef FIND_MIN_MAX
-#ifdef HAVE_MMX2
+#if HAVE_MMX2
 #define REAL_FIND_MIN_MAX(addr)\
 "movq " #addr ", %%mm0                  \n\t"\
 "pminub %%mm0, %%mm7                    \n\t"\
 "pmaxub %%mm0, %%mm6                    \n\t"
 #else
 FIND_MIN_MAX((%%REGd, %1, 2))
 FIND_MIN_MAX((%0, %1, 8))
 "movq %%mm7, %%mm4                      \n\t"
 "psrlq $8, %%mm7                        \n\t"
-#ifdef HAVE_MMX2
+#if HAVE_MMX2
 "pminub %%mm4, %%mm7                    \n\t" // min of pixels
 "pshufw $0xF9, %%mm7, %%mm4             \n\t"
 "pminub %%mm4, %%mm7                    \n\t" // min of pixels
 "pshufw $0xFE, %%mm7, %%mm4             \n\t"
 "pminub %%mm4, %%mm7                    \n\t"
 #endif
 "movq %%mm6, %%mm4                      \n\t"
 "psrlq $8, %%mm6                        \n\t"
-#ifdef HAVE_MMX2
+#if HAVE_MMX2
 "pmaxub %%mm4, %%mm6                    \n\t" // max of pixels
 "pshufw $0xF9, %%mm6, %%mm4             \n\t"
 "pmaxub %%mm4, %%mm6                    \n\t"
 "pshufw $0xFE, %%mm6, %%mm4             \n\t"
 "pmaxub %%mm4, %%mm6                    \n\t"
 "1:                        \n\t"
 : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2)
 : "%"REG_a, "%"REG_d, "%"REG_c
 );
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //HAVE_MMX2 || HAVE_3DNOW
 int y;
 int min=255;
 int max=0;
 int avg;
 uint8_t *p;
 }
 }
 //        src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255;
 }
 #endif
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //HAVE_MMX2 || HAVE_3DNOW
 }
 #endif //HAVE_ALTIVEC
 /**
 * Deinterlaces the given block by linearly interpolating every second line.
 * lines 0-3 have been passed through the deblock / dering filters already, but can be read, too.
 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
 */
 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride)
 {
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if HAVE_MMX2 || HAVE_3DNOW
 src+= 4*stride;
 __asm__ volatile(
 "lea (%0, %1), %%"REG_a"                \n\t"
 "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
 //      0       1       2       3       4       5       6       7       8       9
 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
 * this filter will read lines 3-15 and write 7-13
 */
 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
 {
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if HAVE_MMX2 || HAVE_3DNOW
 src+= stride*3;
 __asm__ volatile(
 "lea (%0, %1), %%"REG_a"                \n\t"
 "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
 "lea (%%"REG_d", %1, 4), %%"REG_c"      \n\t"
 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc)    , (%%REGc, %1, 2))
 : : "r" (src), "r" ((x86_reg)stride)
 : "%"REG_a, "%"REG_d, "%"REG_c
 );
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //HAVE_MMX2 || HAVE_3DNOW
 int x;
 src+= stride*3;
 for(x=0; x<8; x++){
 src[stride*3] = CLIP((-src[0]        + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4);
 src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4);
 src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4);
 src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
 src++;
 }
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //HAVE_MMX2 || HAVE_3DNOW
 }
 /**
 * Deinterlaces the given block by filtering every second line with a (-1 4 2 4 -1) filter.
 * will be called for every 8x8 block and can read & write from line 4-15
 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
 * this filter will read lines 4-13 and write 5-11
 */
 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
 {
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if HAVE_MMX2 || HAVE_3DNOW
 src+= stride*4;
 __asm__ volatile(
 "lea (%0, %1), %%"REG_a"                \n\t"
 "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
 "pxor %%mm7, %%mm7                      \n\t"
 "movq %%mm0, (%2)                       \n\t"
 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp)
 : "%"REG_a, "%"REG_d
 );
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //HAVE_MMX2 || HAVE_3DNOW
 int x;
 src+= stride*4;
 for(x=0; x<8; x++){
 int t1= tmp[x];
 int t2= src[stride*1];
 src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3);
 tmp[x]= t1;
 src++;
 }
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //HAVE_MMX2 || HAVE_3DNOW
 }
 /**
 * Deinterlaces the given block by filtering every line with a (-1 2 6 2 -1) filter.
 * will be called for every 8x8 block and can read & write from line 4-15
 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
 * this filter will read lines 4-13 and write 4-11
 */
 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
 {
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if HAVE_MMX2 || HAVE_3DNOW
 src+= stride*4;
 __asm__ volatile(
 "lea (%0, %1), %%"REG_a"                \n\t"
 "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
 "pxor %%mm7, %%mm7                      \n\t"
 "movq %%mm0, (%2)                       \n\t"
 "movq %%mm1, (%3)                       \n\t"
 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2)
 : "%"REG_a, "%"REG_d
 );
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //HAVE_MMX2 || HAVE_3DNOW
 int x;
 src+= stride*4;
 for(x=0; x<8; x++){
 int t1= tmp[x];
 int t2= tmp2[x];
 tmp[x]= t3;
 tmp2[x]= t1;
 src++;
 }
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //HAVE_MMX2 || HAVE_3DNOW
 }
 /**
 * Deinterlaces the given block by filtering all lines with a (1 2 1) filter.
 * will be called for every 8x8 block and can read & write from line 4-15
 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
 * this filter will read lines 4-13 and write 4-11
 */
 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
 {
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if HAVE_MMX2 || HAVE_3DNOW
 src+= 4*stride;
 __asm__ volatile(
 "lea (%0, %1), %%"REG_a"                \n\t"
 "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
 //      0       1       2       3       4       5       6       7       8       9
 "movq %%mm1, (%2)                       \n\t"
 : : "r" (src), "r" ((x86_reg)stride), "r" (tmp)
 : "%"REG_a, "%"REG_d
 );
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //HAVE_MMX2 || HAVE_3DNOW
 int a, b, c, x;
 src+= 4*stride;
 for(x=0; x<2; x++){
 a= *(uint32_t*)&tmp[stride*0];
 *(uint32_t*)&tmp[stride*0]= c;
 src += 4;
 tmp += 4;
 }
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //HAVE_MMX2 || HAVE_3DNOW
 }
 /**
 * Deinterlaces the given block by applying a median filter to every second line.
 * will be called for every 8x8 block and can read & write from line 4-15,
 * lines 0-3 have been passed through the deblock / dering filters already, but can be read, too.
 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
 */
 static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
 {
-#ifdef HAVE_MMX
+#ifd HAVE_MMX
 src+= 4*stride;
-#ifdef HAVE_MMX2
+#if HAVE_MMX2
 __asm__ volatile(
 "lea (%0, %1), %%"REG_a"                \n\t"
 "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
 //      0       1       2       3       4       5       6       7       8       9
 //      %0      eax     eax+%1  eax+2%1 %0+4%1  edx     edx+%1  edx+2%1 %0+8%1  edx+4%1
 src++;
 }
 #endif //HAVE_MMX
 }
-#ifdef HAVE_MMX
+#if HAVE_MMX
 /**
 * transposes and shift the given 8x8 Block into dst1 and dst2
 */
 static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride)
 {
 );
 }
 #endif //HAVE_MMX
 //static long test=0;
-#ifndef HAVE_ALTIVEC
+#if !HAVE_ALTIVEC
 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
 uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
 {
 // to save a register (FIXME do this outside of the loops)
 tempBlurredPast[127]= maxNoise[0];
 tempBlurredPast[128]= maxNoise[1];
 tempBlurredPast[129]= maxNoise[2];
 #define FAST_L2_DIFF
 //#define L1_DIFF //u should change the thresholds too if u try that one
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if HAVE_MMX2 || HAVE_3DNOW
 __asm__ volatile(
 "lea (%2, %2, 2), %%"REG_a"             \n\t" // 3*stride
 "lea (%2, %2, 4), %%"REG_d"             \n\t" // 5*stride
 "lea (%%"REG_d", %2, 2), %%"REG_c"      \n\t" // 7*stride
 //      0       1       2       3       4       5       6       7       8       9
 "4:                                     \n\t"
 :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast)
 : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
 );
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //HAVE_MMX2 || HAVE_3DNOW
 {
 int y;
 int d=0;
 //    int sysd=0;
 int i;
 }
 }
 }
 }
 }
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //HAVE_MMX2 || HAVE_3DNOW
 }
 #endif //HAVE_ALTIVEC
-#ifdef HAVE_MMX
+#if HAVE_MMX
 /**
 * accurate deblock filter
 */
 static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int stride, PPContext *c){
 int64_t dc_mask, eq_mask, both_masks;
 "psubw %%mm3, %%mm1                     \n\t" // 2H4 - 5H5 + 5H6 - 2H7
 "movq (%%"REG_c"), %%mm2                \n\t" // 2L0 - 5L1 + 5L2 - 2L3
 "movq 8(%%"REG_c"), %%mm3               \n\t" // 2H0 - 5H1 + 5H2 - 2H3
-#ifdef HAVE_MMX2
+#if HAVE_MMX2
 "movq %%mm7, %%mm6                      \n\t" // 0
 "psubw %%mm0, %%mm6                     \n\t"
 "pmaxsw %%mm6, %%mm0                    \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
 "movq %%mm7, %%mm6                      \n\t" // 0
 "psubw %%mm1, %%mm6                     \n\t"
 "pcmpgtw %%mm3, %%mm6                   \n\t"
 "pxor %%mm6, %%mm3                      \n\t"
 "psubw %%mm6, %%mm3                     \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
 #endif
-#ifdef HAVE_MMX2
+#if HAVE_MMX2
 "pminsw %%mm2, %%mm0                    \n\t"
 "pminsw %%mm3, %%mm1                    \n\t"
 #else
 "movq %%mm0, %%mm6                      \n\t"
 "psubusw %%mm2, %%mm6                   \n\t"
 "pxor %%mm6, %%mm2                      \n\t"
 "pxor %%mm7, %%mm3                      \n\t"
 "pand %%mm2, %%mm4                      \n\t"
 "pand %%mm3, %%mm5                      \n\t"
-#ifdef HAVE_MMX2
+#if HAVE_MMX2
 "pminsw %%mm0, %%mm4                    \n\t"
 "pminsw %%mm1, %%mm5                    \n\t"
 #else
 "movq %%mm4, %%mm2                      \n\t"
 "psubusw %%mm0, %%mm2                   \n\t"
 #undef SCALED_CPY
 static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride,
 int levelFix, int64_t *packedOffsetAndScale)
 {
-#ifndef HAVE_MMX
+#if !HAVE_MMX
 int i;
 #endif
 if(levelFix){
-#ifdef HAVE_MMX
+#if HAVE_MMX
 __asm__ volatile(
 "movq (%%"REG_a"), %%mm2        \n\t" // packedYOffset
 "movq 8(%%"REG_a"), %%mm3       \n\t" // packedYScale
 "lea (%2,%4), %%"REG_a"         \n\t"
 "lea (%3,%5), %%"REG_d"         \n\t"
 "pxor %%mm4, %%mm4              \n\t"
-#ifdef HAVE_MMX2
+#if HAVE_MMX2
 #define REAL_SCALED_CPY(src1, src2, dst1, dst2)                                                \
 "movq " #src1 ", %%mm0          \n\t"\
 "movq " #src1 ", %%mm5          \n\t"\
 "movq " #src2 ", %%mm1          \n\t"\
 "movq " #src2 ", %%mm6          \n\t"\
 for(i=0; i<8; i++)
 memcpy( &(dst[dstStride*i]),
 &(src[srcStride*i]), BLOCK_SIZE);
 #endif //HAVE_MMX
 }else{
-#ifdef HAVE_MMX
+#if HAVE_MMX
 __asm__ volatile(
 "lea (%0,%2), %%"REG_a"                 \n\t"
 "lea (%1,%3), %%"REG_d"                 \n\t"
 #define REAL_SIMPLE_CPY(src1, src2, dst1, dst2)                              \
 /**
 * Duplicates the given 8 src pixels ? times upward
 */
 static inline void RENAME(duplicate)(uint8_t src[], int stride)
 {
-#ifdef HAVE_MMX
+#if HAVE_MMX
 __asm__ volatile(
 "movq (%0), %%mm0               \n\t"
 "add %1, %0                     \n\t"
 "movq %%mm0, (%0)               \n\t"
 "movq %%mm0, (%0, %1)           \n\t"
 #endif
 int black=0, white=255; // blackest black and whitest white in the picture
 int QPCorrecture= 256*256;
 int copyAhead;
-#ifdef HAVE_MMX
+#if HAVE_MMX
 int i;
 #endif
 const int qpHShift= isColor ? 4-c.hChromaSubSample : 4;
 const int qpVShift= isColor ? 4-c.vChromaSubSample : 4;
 uint64_t * const yHistogram= c.yHistogram;
 uint8_t * const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride;
 uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride;
 //const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4;
-#ifdef HAVE_MMX
+#if HAVE_MMX
 for(i=0; i<57; i++){
 int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
 int threshold= offset*2 + 1;
 c.mmxDcOffset[i]= 0x7F - offset;
 c.mmxDcThreshold[i]= 0x7F - threshold;
 clipped-= yHistogram[white];
 }
 scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black);
-#ifdef HAVE_MMX2
+#if HAVE_MMX2
 c.packedYScale= (uint16_t)(scale*256.0 + 0.5);
 c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
 #else
 c.packedYScale= (uint16_t)(scale*1024.0 + 0.5);
 c.packedYOffset= (black - c.ppMode.minAllowedY) & 0xFFFF;
 // From this point on it is guaranteed that we can read and write 16 lines downward
 // finish 1 block before the next otherwise we might have a problem
 // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
 for(x=0; x<width; x+=BLOCK_SIZE){
-#ifdef HAVE_MMX2
+#if HAVE_MMX2
 /*
 prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
 prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
 prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
 prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
 : "%"REG_a, "%"REG_d
 );
-#elif defined(HAVE_3DNOW)
+#elif HAVE_3DNOW
 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
 /*          prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
 prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
 prefetchw(dstBlock + (((x>>3)&3) + 5)*dstStride + 32);
 prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32);
 for(y=0; y<height; y+=BLOCK_SIZE){
 //1% speedup if these are here instead of the inner loop
 const uint8_t *srcBlock= &(src[y*srcStride]);
 uint8_t *dstBlock= &(dst[y*dstStride]);
-#ifdef HAVE_MMX
+#if HAVE_MMX
 uint8_t *tempBlock1= c.tempBlocks;
 uint8_t *tempBlock2= c.tempBlocks + 8;
 #endif
 const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
 int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*FFABS(QPStride)];
 // From this point on it is guaranteed that we can read and write 16 lines downward
 // finish 1 block before the next otherwise we might have a problem
 // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
 for(x=0; x<width; x+=BLOCK_SIZE){
 const int stride= dstStride;
-#ifdef HAVE_MMX
+#if HAVE_MMX
 uint8_t *tmpXchg;
 #endif
 if(isColor){
 QP= QPptr[x>>qpHShift];
 c.nonBQP= nonBQPptr[x>>qpHShift];
 c.nonBQP= nonBQPptr[x>>4];
 c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
 yHistogram[ srcBlock[srcStride*12 + 4] ]++;
 }
 c.QP= QP;
-#ifdef HAVE_MMX
+#if HAVE_MMX
 __asm__ volatile(
 "movd %1, %%mm7         \n\t"
 "packuswb %%mm7, %%mm7  \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
 "packuswb %%mm7, %%mm7  \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
 "packuswb %%mm7, %%mm7  \n\t" // QP,..., QP
 : "r" (QP)
 );
 #endif
-#ifdef HAVE_MMX2
+#if HAVE_MMX2
 /*
 prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
 prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
 prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
 prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
 : "%"REG_a, "%"REG_d
 );
-#elif defined(HAVE_3DNOW)
+#elif HAVE_3DNOW
 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
 /*          prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
 prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
 prefetchw(dstBlock + (((x>>3)&3) + 5)*dstStride + 32);
 prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32);
 }else if(mode & V_A_DEBLOCK){
 RENAME(do_a_deblock)(dstBlock, stride, 1, &c);
 }
 }
-#ifdef HAVE_MMX
+#if HAVE_MMX
 RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
 #endif
 /* check if we have a previous block to deblock it with dstBlock */
 if(x - 8 >= 0){
-#ifdef HAVE_MMX
+#if HAVE_MMX
 if(mode & H_X1_FILTER)
 RENAME(vertX1Filter)(tempBlock1, 16, &c);
 else if(mode & H_DEBLOCK){
 //START_TIMER
 const int t= RENAME(vertClassify)(tempBlock1, 16, &c);
 #else
 if(mode & H_X1_FILTER)
 horizX1Filter(dstBlock-4, stride, QP);
 else if(mode & H_DEBLOCK){
-#ifdef HAVE_ALTIVEC
+#if HAVE_ALTIVEC
 DECLARE_ALIGNED(16, unsigned char, tempBlock[272]);
 transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);
 const int t=vertClassify_altivec(tempBlock-48, 16, &c);
 if(t==1) {
 }
 dstBlock+=8;
 srcBlock+=8;
-#ifdef HAVE_MMX
+#if HAVE_MMX
 tmpXchg= tempBlock1;
 tempBlock1= tempBlock2;
 tempBlock2 = tmpXchg;
 #endif
 }
 + dstBlock[x +11*dstStride] + dstBlock[x +12*dstStride];
 + dstBlock[x +13*dstStride]
 + dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride];
 }*/
 }
-#ifdef HAVE_3DNOW
+#if   HAVE_3DNOW
 __asm__ volatile("femms");
-#elif defined (HAVE_MMX)
+#elif HAVE_MMX
 __asm__ volatile("emms");
 #endif
 #ifdef DEBUG_BRIGHTNESS
 if(!isColor){

Mercurial > libpostproc.hg

comparison postprocess_template.c @ 118:bdd1788fb53b libpostproc