libpostproc.hg: postprocess_template.c comparison

comparison postprocess_template.c @ 121:dd89aa84269b libpostproc

HAVE_3DNOW --> HAVE_AMD3DNOW to sync with latest configure changes.

author	diego
date	Sun, 25 Jan 2009 19:57:52 +0000
parents	4a1602d552aa
children	1500ae6cf66c

comparison

equal deleted inserted replaced

-:e86e6ea21776
+:dd89aa84269b
 #undef PMINUB
 #undef PMAXUB
 #if   HAVE_MMX2
 #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
-#elif HAVE_3DNOW
+#elif HAVE_AMD3DNOW
 #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
 #endif
 #define PAVGB(a,b)  REAL_PAVGB(a,b)
 #if   HAVE_MMX2
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16
 */
 #if !HAVE_ALTIVEC
 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
 {
-#if HAVE_MMX2 || HAVE_3DNOW
+#if HAVE_MMX2 || HAVE_AMD3DNOW
 src+= stride*3;
 __asm__ volatile(        //"movv %0 %1 %2\n\t"
 "movq %2, %%mm0                         \n\t"  // QP,..., QP
 "pxor %%mm4, %%mm4                      \n\t"
 :
 : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
 : "%"REG_a, "%"REG_c
 );
-#else //HAVE_MMX2 || HAVE_3DNOW
+#else //HAVE_MMX2 || HAVE_AMD3DNOW
 const int l1= stride;
 const int l2= stride + l1;
 const int l3= stride + l2;
 const int l4= stride + l3;
 const int l5= stride + l4;
 src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
 src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
 src++;
 }
-#endif //HAVE_MMX2 || HAVE_3DNOW
+#endif //HAVE_MMX2 || HAVE_AMD3DNOW
 }
 #endif //HAVE_ALTIVEC
 #if 0
 /**
 x/8 = 1
 1 12 12 23
 */
 static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
 {
-#if HAVE_MMX2 || HAVE_3DNOW
+#if HAVE_MMX2 || HAVE_AMD3DNOW
 src+= stride*3;
 // FIXME rounding
 __asm__ volatile(
 "pxor %%mm7, %%mm7                      \n\t" // 0
 "movq "MANGLE(b80)", %%mm6              \n\t" // MIN_SIGNED_BYTE
 :
 : "r" (src), "r" ((x86_reg)stride)
 : "%"REG_a, "%"REG_c
 );
-#else //HAVE_MMX2 || HAVE_3DNOW
+#else //HAVE_MMX2 || HAVE_AMD3DNOW
 const int l1= stride;
 const int l2= stride + l1;
 const int l3= stride + l2;
 const int l4= stride + l3;
 const int l5= stride + l4;
 src[x+l5] -=v>>1;
 src[x+l6] -=v>>3;
 }
 }
-#endif //HAVE_MMX2 || HAVE_3DNOW
+#endif //HAVE_MMX2 || HAVE_AMD3DNOW
 }
 #endif //0
 /**
 * Experimental Filter 1
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
 * MMX2 version does correct clipping C version does not
 */
 static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
 {
-#if HAVE_MMX2 || HAVE_3DNOW
+#if HAVE_MMX2 || HAVE_AMD3DNOW
 src+= stride*3;
 __asm__ volatile(
 "pxor %%mm7, %%mm7                      \n\t" // 0
 "lea (%0, %1), %%"REG_a"                \n\t"
 :
 : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb)
 : "%"REG_a, "%"REG_c
 );
-#else //HAVE_MMX2 || HAVE_3DNOW
+#else //HAVE_MMX2 || HAVE_AMD3DNOW
 const int l1= stride;
 const int l2= stride + l1;
 const int l3= stride + l2;
 const int l4= stride + l3;
 src[l6] -=v>>2;
 src[l7] -=v>>3;
 }
 src++;
 }
-#endif //HAVE_MMX2 || HAVE_3DNOW
+#endif //HAVE_MMX2 || HAVE_AMD3DNOW
 }
 #if !HAVE_ALTIVEC
 static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c)
 {
-#if HAVE_MMX2 || HAVE_3DNOW
+#if HAVE_MMX2 || HAVE_AMD3DNOW
 /*
 uint8_t tmp[16];
 const int l1= stride;
 const int l2= stride + l1;
 const int l3= stride + l2;
 : "+r" (src)
 : "r" ((x86_reg)stride), "m" (c->pQPb)
 : "%"REG_a, "%"REG_c
 );
-#else //HAVE_MMX2 || HAVE_3DNOW
+#else //HAVE_MMX2 || HAVE_AMD3DNOW
 const int l1= stride;
 const int l2= stride + l1;
 const int l3= stride + l2;
 const int l4= stride + l3;
 const int l5= stride + l4;
 src[l4]-= d;
 src[l5]+= d;
 }
 src++;
 }
-#endif //HAVE_MMX2 || HAVE_3DNOW
+#endif //HAVE_MMX2 || HAVE_AMD3DNOW
 }
 #endif //HAVE_ALTIVEC
 #if !HAVE_ALTIVEC
 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
 {
-#if HAVE_MMX2 || HAVE_3DNOW
+#if HAVE_MMX2 || HAVE_AMD3DNOW
 __asm__ volatile(
 "pxor %%mm6, %%mm6                      \n\t"
 "pcmpeqb %%mm7, %%mm7                   \n\t"
 "movq %2, %%mm0                         \n\t"
 "punpcklbw %%mm6, %%mm0                 \n\t"
 "1:                        \n\t"
 : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2)
 : "%"REG_a, "%"REG_d, "%"REG_c
 );
-#else //HAVE_MMX2 || HAVE_3DNOW
+#else //HAVE_MMX2 || HAVE_AMD3DNOW
 int y;
 int min=255;
 int max=0;
 int avg;
 uint8_t *p;
 }
 }
 //        src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255;
 }
 #endif
-#endif //HAVE_MMX2 || HAVE_3DNOW
+#endif //HAVE_MMX2 || HAVE_AMD3DNOW
 }
 #endif //HAVE_ALTIVEC
 /**
 * Deinterlaces the given block by linearly interpolating every second line.
 * lines 0-3 have been passed through the deblock / dering filters already, but can be read, too.
 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
 */
 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride)
 {
-#if HAVE_MMX2 || HAVE_3DNOW
+#if HAVE_MMX2 || HAVE_AMD3DNOW
 src+= 4*stride;
 __asm__ volatile(
 "lea (%0, %1), %%"REG_a"                \n\t"
 "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
 //      0       1       2       3       4       5       6       7       8       9
 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
 * this filter will read lines 3-15 and write 7-13
 */
 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
 {
-#if HAVE_MMX2 || HAVE_3DNOW
+#if HAVE_MMX2 || HAVE_AMD3DNOW
 src+= stride*3;
 __asm__ volatile(
 "lea (%0, %1), %%"REG_a"                \n\t"
 "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
 "lea (%%"REG_d", %1, 4), %%"REG_c"      \n\t"
 DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc)    , (%%REGc, %1, 2))
 : : "r" (src), "r" ((x86_reg)stride)
 : "%"REG_a, "%"REG_d, "%"REG_c
 );
-#else //HAVE_MMX2 || HAVE_3DNOW
+#else //HAVE_MMX2 || HAVE_AMD3DNOW
 int x;
 src+= stride*3;
 for(x=0; x<8; x++){
 src[stride*3] = CLIP((-src[0]        + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4);
 src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4);
 src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4);
 src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
 src++;
 }
-#endif //HAVE_MMX2 || HAVE_3DNOW
+#endif //HAVE_MMX2 || HAVE_AMD3DNOW
 }
 /**
 * Deinterlaces the given block by filtering every second line with a (-1 4 2 4 -1) filter.
 * will be called for every 8x8 block and can read & write from line 4-15
 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
 * this filter will read lines 4-13 and write 5-11
 */
 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
 {
-#if HAVE_MMX2 || HAVE_3DNOW
+#if HAVE_MMX2 || HAVE_AMD3DNOW
 src+= stride*4;
 __asm__ volatile(
 "lea (%0, %1), %%"REG_a"                \n\t"
 "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
 "pxor %%mm7, %%mm7                      \n\t"
 "movq %%mm0, (%2)                       \n\t"
 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp)
 : "%"REG_a, "%"REG_d
 );
-#else //HAVE_MMX2 || HAVE_3DNOW
+#else //HAVE_MMX2 || HAVE_AMD3DNOW
 int x;
 src+= stride*4;
 for(x=0; x<8; x++){
 int t1= tmp[x];
 int t2= src[stride*1];
 src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3);
 tmp[x]= t1;
 src++;
 }
-#endif //HAVE_MMX2 || HAVE_3DNOW
+#endif //HAVE_MMX2 || HAVE_AMD3DNOW
 }
 /**
 * Deinterlaces the given block by filtering every line with a (-1 2 6 2 -1) filter.
 * will be called for every 8x8 block and can read & write from line 4-15
 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
 * this filter will read lines 4-13 and write 4-11
 */
 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
 {
-#if HAVE_MMX2 || HAVE_3DNOW
+#if HAVE_MMX2 || HAVE_AMD3DNOW
 src+= stride*4;
 __asm__ volatile(
 "lea (%0, %1), %%"REG_a"                \n\t"
 "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
 "pxor %%mm7, %%mm7                      \n\t"
 "movq %%mm0, (%2)                       \n\t"
 "movq %%mm1, (%3)                       \n\t"
 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2)
 : "%"REG_a, "%"REG_d
 );
-#else //HAVE_MMX2 || HAVE_3DNOW
+#else //HAVE_MMX2 || HAVE_AMD3DNOW
 int x;
 src+= stride*4;
 for(x=0; x<8; x++){
 int t1= tmp[x];
 int t2= tmp2[x];
 tmp[x]= t3;
 tmp2[x]= t1;
 src++;
 }
-#endif //HAVE_MMX2 || HAVE_3DNOW
+#endif //HAVE_MMX2 || HAVE_AMD3DNOW
 }
 /**
 * Deinterlaces the given block by filtering all lines with a (1 2 1) filter.
 * will be called for every 8x8 block and can read & write from line 4-15
 * lines 4-12 will be read into the deblocking filter and should be deinterlaced
 * this filter will read lines 4-13 and write 4-11
 */
 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
 {
-#if HAVE_MMX2 || HAVE_3DNOW
+#if HAVE_MMX2 || HAVE_AMD3DNOW
 src+= 4*stride;
 __asm__ volatile(
 "lea (%0, %1), %%"REG_a"                \n\t"
 "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
 //      0       1       2       3       4       5       6       7       8       9
 "movq %%mm1, (%2)                       \n\t"
 : : "r" (src), "r" ((x86_reg)stride), "r" (tmp)
 : "%"REG_a, "%"REG_d
 );
-#else //HAVE_MMX2 || HAVE_3DNOW
+#else //HAVE_MMX2 || HAVE_AMD3DNOW
 int a, b, c, x;
 src+= 4*stride;
 for(x=0; x<2; x++){
 a= *(uint32_t*)&tmp[stride*0];
 *(uint32_t*)&tmp[stride*0]= c;
 src += 4;
 tmp += 4;
 }
-#endif //HAVE_MMX2 || HAVE_3DNOW
+#endif //HAVE_MMX2 || HAVE_AMD3DNOW
 }
 /**
 * Deinterlaces the given block by applying a median filter to every second line.
 * will be called for every 8x8 block and can read & write from line 4-15,
 tempBlurredPast[128]= maxNoise[1];
 tempBlurredPast[129]= maxNoise[2];
 #define FAST_L2_DIFF
 //#define L1_DIFF //u should change the thresholds too if u try that one
-#if HAVE_MMX2 || HAVE_3DNOW
+#if HAVE_MMX2 || HAVE_AMD3DNOW
 __asm__ volatile(
 "lea (%2, %2, 2), %%"REG_a"             \n\t" // 3*stride
 "lea (%2, %2, 4), %%"REG_d"             \n\t" // 5*stride
 "lea (%%"REG_d", %2, 2), %%"REG_c"      \n\t" // 7*stride
 //      0       1       2       3       4       5       6       7       8       9
 "4:                                     \n\t"
 :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast)
 : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
 );
-#else //HAVE_MMX2 || HAVE_3DNOW
+#else //HAVE_MMX2 || HAVE_AMD3DNOW
 {
 int y;
 int d=0;
 //    int sysd=0;
 int i;
 }
 }
 }
 }
 }
-#endif //HAVE_MMX2 || HAVE_3DNOW
+#endif //HAVE_MMX2 || HAVE_AMD3DNOW
 }
 #endif //HAVE_ALTIVEC
 #if HAVE_MMX
 /**
 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
 : "%"REG_a, "%"REG_d
 );
-#elif HAVE_3DNOW
+#elif HAVE_AMD3DNOW
 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
 /*          prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
 prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
 prefetchw(dstBlock + (((x>>3)&3) + 5)*dstStride + 32);
 prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32);
 :: "r" (srcBlock), "r" ((x86_reg)srcStride), "r" (dstBlock), "r" ((x86_reg)dstStride),
 "g" ((x86_reg)x), "g" ((x86_reg)copyAhead)
 : "%"REG_a, "%"REG_d
 );
-#elif HAVE_3DNOW
+#elif HAVE_AMD3DNOW
 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
 /*          prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
 prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
 prefetchw(dstBlock + (((x>>3)&3) + 5)*dstStride + 32);
 prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32);
 + dstBlock[x +11*dstStride] + dstBlock[x +12*dstStride];
 + dstBlock[x +13*dstStride]
 + dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride];
 }*/
 }
-#if   HAVE_3DNOW
+#if   HAVE_AMD3DNOW
 __asm__ volatile("femms");
 #elif HAVE_MMX
 __asm__ volatile("emms");
 #endif

Mercurial > libpostproc.hg

comparison postprocess_template.c @ 121:dd89aa84269b libpostproc