diff libmpeg2/motion_comp_alpha.c @ 12932:d0a8810e155c

Importing libmpeg2 from mpeg2dec-0.4.0b
author henry
date Mon, 02 Aug 2004 11:26:43 +0000
parents 89b48bc6c441
children 25337a2147e7
line wrap: on
line diff
--- a/libmpeg2/motion_comp_alpha.c	Mon Aug 02 07:58:21 2004 +0000
+++ b/libmpeg2/motion_comp_alpha.c	Mon Aug 02 11:26:43 2004 +0000
@@ -1,6 +1,6 @@
 /*
  * motion_comp_alpha.c
- * Copyright (C) 2002 Falk Hueffner <falk@debian.org>
+ * Copyright (C) 2002-2003 Falk Hueffner <falk@debian.org>
  *
  * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
  * See http://libmpeg2.sourceforge.net/ for updates.
@@ -27,135 +27,136 @@
 #include <inttypes.h>
 
 #include "mpeg2.h"
+#include "attributes.h"
 #include "mpeg2_internal.h"
 #include "alpha_asm.h"
 
-static inline uint64_t avg2(uint64_t a, uint64_t b)
+static inline uint64_t avg2 (uint64_t a, uint64_t b)
 {
-    return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);    
+    return (a | b) - (((a ^ b) & BYTE_VEC (0xfe)) >> 1);
 }
 
 // Load two unaligned quadwords from addr. This macro only works if
 // addr is actually unaligned.
-#define ULOAD16(ret_l, ret_r, addr)			\
+#define ULOAD16(ret_l,ret_r,addr)			\
     do {						\
-	uint64_t _l = ldq_u(addr +  0);			\
-	uint64_t _m = ldq_u(addr +  8);			\
-	uint64_t _r = ldq_u(addr + 16);			\
-	ret_l = extql(_l, addr) | extqh(_m, addr);	\
-	ret_r = extql(_m, addr) | extqh(_r, addr);	\
+	uint64_t _l = ldq_u (addr +  0);		\
+	uint64_t _m = ldq_u (addr +  8);		\
+	uint64_t _r = ldq_u (addr + 16);		\
+	ret_l = extql (_l, addr) | extqh (_m, addr);	\
+	ret_r = extql (_m, addr) | extqh (_r, addr);	\
     } while (0)
 
 // Load two aligned quadwords from addr.
-#define ALOAD16(ret_l, ret_r, addr)			\
+#define ALOAD16(ret_l,ret_r,addr)			\
     do {						\
-	ret_l = ldq(addr);				\
-	ret_r = ldq(addr + 8);				\
+	ret_l = ldq (addr);				\
+	ret_r = ldq (addr + 8);				\
     } while (0)
 
-#define OP8(LOAD, LOAD16, STORE)		\
+#define OP8(LOAD,LOAD16,STORE)			\
     do {					\
-	STORE(LOAD(pixels), block);		\
+	STORE (LOAD (pixels), block);		\
 	pixels += line_size;			\
 	block += line_size;			\
     } while (--h)
 
-#define OP16(LOAD, LOAD16, STORE)		\
+#define OP16(LOAD,LOAD16,STORE)			\
     do {					\
 	uint64_t l, r;				\
-	LOAD16(l, r, pixels);			\
-	STORE(l, block);			\
-	STORE(r, block + 8);			\
+	LOAD16 (l, r, pixels);			\
+	STORE (l, block);			\
+	STORE (r, block + 8);			\
 	pixels += line_size;			\
 	block += line_size;			\
     } while (--h)
 
-#define OP8_X2(LOAD, LOAD16, STORE)			\
+#define OP8_X2(LOAD,LOAD16,STORE)			\
     do {						\
 	uint64_t p0, p1;				\
 							\
-	p0 = LOAD(pixels);				\
+	p0 = LOAD (pixels);				\
 	p1 = p0 >> 8 | ((uint64_t) pixels[8] << 56);	\
-	STORE(avg2(p0, p1), block);			\
+	STORE (avg2 (p0, p1), block);			\
 	pixels += line_size;				\
 	block += line_size;				\
     } while (--h)
 
-#define OP16_X2(LOAD, LOAD16, STORE)				\
+#define OP16_X2(LOAD,LOAD16,STORE)				\
     do {							\
 	uint64_t p0, p1;					\
 								\
-	LOAD16(p0, p1, pixels);					\
-	STORE(avg2(p0, p0 >> 8 | p1 << 56), block);		\
-	STORE(avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56),	\
-	      block + 8);					\
+	LOAD16 (p0, p1, pixels);				\
+	STORE (avg2(p0, p0 >> 8 | p1 << 56), block);		\
+	STORE (avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56),	\
+	       block + 8);					\
 	pixels += line_size;					\
 	block += line_size;					\
     } while (--h)
 
-#define OP8_Y2(LOAD, LOAD16, STORE)		\
+#define OP8_Y2(LOAD,LOAD16,STORE)		\
     do {					\
 	uint64_t p0, p1;			\
-	p0 = LOAD(pixels);			\
+	p0 = LOAD (pixels);			\
 	pixels += line_size;			\
-	p1 = LOAD(pixels);			\
+	p1 = LOAD (pixels);			\
 	do {					\
-	    uint64_t av = avg2(p0, p1);		\
+	    uint64_t av = avg2 (p0, p1);	\
 	    if (--h == 0) line_size = 0;	\
 	    pixels += line_size;		\
 	    p0 = p1;				\
-	    p1 = LOAD(pixels);			\
-	    STORE(av, block);			\
+	    p1 = LOAD (pixels);			\
+	    STORE (av, block);			\
 	    block += line_size;			\
 	} while (h);				\
     } while (0)
 
-#define OP16_Y2(LOAD, LOAD16, STORE)		\
+#define OP16_Y2(LOAD,LOAD16,STORE)		\
     do {					\
 	uint64_t p0l, p0r, p1l, p1r;		\
-	LOAD16(p0l, p0r, pixels);		\
+	LOAD16 (p0l, p0r, pixels);		\
 	pixels += line_size;			\
-	LOAD16(p1l, p1r, pixels);		\
+	LOAD16 (p1l, p1r, pixels);		\
 	do {					\
 	    uint64_t avl, avr;			\
 	    if (--h == 0) line_size = 0;	\
-	    avl = avg2(p0l, p1l);		\
-	    avr = avg2(p0r, p1r);		\
+	    avl = avg2 (p0l, p1l);		\
+	    avr = avg2 (p0r, p1r);		\
 	    p0l = p1l;				\
 	    p0r = p1r;				\
 	    pixels += line_size;		\
-	    LOAD16(p1l, p1r, pixels);		\
-	    STORE(avl, block);			\
-	    STORE(avr, block + 8);		\
+	    LOAD16 (p1l, p1r, pixels);		\
+	    STORE (avl, block);			\
+	    STORE (avr, block + 8);		\
 	    block += line_size;			\
 	} while (h);				\
     } while (0)
 
-#define OP8_XY2(LOAD, LOAD16, STORE)				\
+#define OP8_XY2(LOAD,LOAD16,STORE)				\
     do {							\
 	uint64_t pl, ph;					\
-	uint64_t p1 = LOAD(pixels);				\
+	uint64_t p1 = LOAD (pixels);				\
 	uint64_t p2 = p1 >> 8 | ((uint64_t) pixels[8] << 56);	\
 								\
-	ph = ((p1 & ~BYTE_VEC(0x03)) >> 2)			\
-	   + ((p2 & ~BYTE_VEC(0x03)) >> 2);			\
-	pl = (p1 & BYTE_VEC(0x03))				\
-	   + (p2 & BYTE_VEC(0x03));				\
+	ph = (((p1 & ~BYTE_VEC (0x03)) >> 2) +			\
+	      ((p2 & ~BYTE_VEC (0x03)) >> 2));			\
+	pl = ((p1 & BYTE_VEC (0x03)) +				\
+	      (p2 & BYTE_VEC (0x03)));				\
 								\
 	do {							\
 	    uint64_t npl, nph;					\
 								\
 	    pixels += line_size;				\
-	    p1 = LOAD(pixels);					\
+	    p1 = LOAD (pixels);					\
 	    p2 = (p1 >> 8) | ((uint64_t) pixels[8] << 56);	\
-	    nph = ((p1 & ~BYTE_VEC(0x03)) >> 2)			\
-	        + ((p2 & ~BYTE_VEC(0x03)) >> 2);		\
-	    npl = (p1 & BYTE_VEC(0x03))				\
-	        + (p2 & BYTE_VEC(0x03));			\
+	    nph = (((p1 & ~BYTE_VEC (0x03)) >> 2) +		\
+	           ((p2 & ~BYTE_VEC (0x03)) >> 2));		\
+	    npl = ((p1 & BYTE_VEC (0x03)) +			\
+	           (p2 & BYTE_VEC (0x03)));			\
 								\
-	    STORE(ph + nph					\
-		  + (((pl + npl + BYTE_VEC(0x02)) >> 2)		\
-		     & BYTE_VEC(0x03)), block);			\
+	    STORE (ph + nph +					\
+		   (((pl + npl + BYTE_VEC (0x02)) >> 2) &	\
+		    BYTE_VEC (0x03)), block);			\
 								\
 	    block += line_size;					\
             pl = npl;						\
@@ -163,44 +164,44 @@
 	} while (--h);						\
     } while (0)
 
-#define OP16_XY2(LOAD, LOAD16, STORE)				\
+#define OP16_XY2(LOAD,LOAD16,STORE)				\
     do {							\
 	uint64_t p0, p1, p2, p3, pl_l, ph_l, pl_r, ph_r;	\
-	LOAD16(p0, p2, pixels);					\
+	LOAD16 (p0, p2, pixels);				\
 	p1 = p0 >> 8 | (p2 << 56);				\
-	p3 = p2 >> 8 | ((uint64_t) pixels[16] << 56);		\
+	p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56);		\
 								\
-	ph_l = ((p0 & ~BYTE_VEC(0x03)) >> 2)			\
-	     + ((p1 & ~BYTE_VEC(0x03)) >> 2);			\
-	pl_l = (p0 & BYTE_VEC(0x03))				\
-	     + (p1 & BYTE_VEC(0x03));				\
-	ph_r = ((p2 & ~BYTE_VEC(0x03)) >> 2)			\
-	     + ((p3 & ~BYTE_VEC(0x03)) >> 2);			\
-	pl_r = (p2 & BYTE_VEC(0x03))				\
-	     + (p3 & BYTE_VEC(0x03));				\
+	ph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) +		\
+	        ((p1 & ~BYTE_VEC (0x03)) >> 2));		\
+	pl_l = ((p0 & BYTE_VEC (0x03)) +			\
+	        (p1 & BYTE_VEC(0x03)));				\
+	ph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) +		\
+	        ((p3 & ~BYTE_VEC (0x03)) >> 2));		\
+	pl_r = ((p2 & BYTE_VEC (0x03)) +			\
+	        (p3 & BYTE_VEC (0x03)));			\
 								\
 	do {							\
 	    uint64_t npl_l, nph_l, npl_r, nph_r;		\
 								\
 	    pixels += line_size;				\
-	    LOAD16(p0, p2, pixels);				\
+	    LOAD16 (p0, p2, pixels);				\
 	    p1 = p0 >> 8 | (p2 << 56);				\
-	    p3 = p2 >> 8 | ((uint64_t) pixels[16] << 56);	\
-	    nph_l = ((p0 & ~BYTE_VEC(0x03)) >> 2)		\
-		  + ((p1 & ~BYTE_VEC(0x03)) >> 2);		\
-	    npl_l = (p0 & BYTE_VEC(0x03))			\
-		  + (p1 & BYTE_VEC(0x03));			\
-	    nph_r = ((p2 & ~BYTE_VEC(0x03)) >> 2)		\
-		  + ((p3 & ~BYTE_VEC(0x03)) >> 2);		\
-	    npl_r = (p2 & BYTE_VEC(0x03))			\
-		  + (p3 & BYTE_VEC(0x03));			\
+	    p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56);	\
+	    nph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) +		\
+		     ((p1 & ~BYTE_VEC (0x03)) >> 2));		\
+	    npl_l = ((p0 & BYTE_VEC (0x03)) +			\
+		     (p1 & BYTE_VEC (0x03)));			\
+	    nph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) +		\
+		     ((p3 & ~BYTE_VEC (0x03)) >> 2));		\
+	    npl_r = ((p2 & BYTE_VEC (0x03)) +			\
+		     (p3 & BYTE_VEC (0x03)));			\
 								\
-	    STORE(ph_l + nph_l					\
-		  + (((pl_l + npl_l + BYTE_VEC(0x02)) >> 2)	\
-		     & BYTE_VEC(0x03)), block);			\
-	    STORE(ph_r + nph_r					\
-		  + (((pl_r + npl_r + BYTE_VEC(0x02)) >> 2)	\
-		     & BYTE_VEC(0x03)), block + 8);		\
+	    STORE (ph_l + nph_l +				\
+		   (((pl_l + npl_l + BYTE_VEC (0x02)) >> 2) &	\
+		    BYTE_VEC(0x03)), block);			\
+	    STORE (ph_r + nph_r +				\
+		   (((pl_r + npl_r + BYTE_VEC (0x02)) >> 2) &	\
+		    BYTE_VEC(0x03)), block + 8);		\
 								\
 	    block += line_size;					\
 	    pl_l = npl_l;					\
@@ -210,34 +211,33 @@
 	} while (--h);						\
     } while (0)
 
-#define MAKE_OP(OPNAME, SIZE, SUFF, OPKIND, STORE)			\
+#define MAKE_OP(OPNAME,SIZE,SUFF,OPKIND,STORE)				\
 static void MC_ ## OPNAME ## _ ## SUFF ## _ ## SIZE ## _alpha		\
 	(uint8_t *restrict block, const uint8_t *restrict pixels,	\
 	 int line_size, int h)						\
 {									\
     if ((uint64_t) pixels & 0x7) {					\
-	OPKIND(uldq, ULOAD16, STORE);					\
+	OPKIND (uldq, ULOAD16, STORE);					\
     } else {								\
-	OPKIND(ldq, ALOAD16, STORE);					\
+	OPKIND (ldq, ALOAD16, STORE);					\
     }									\
 }
 
-#define PIXOP(OPNAME, STORE)			\
-    MAKE_OP(OPNAME, 8,  o,  OP8,      STORE);	\
-    MAKE_OP(OPNAME, 8,  x,  OP8_X2,   STORE);	\
-    MAKE_OP(OPNAME, 8,  y,  OP8_Y2,   STORE);	\
-    MAKE_OP(OPNAME, 8,  xy, OP8_XY2,  STORE);	\
-    MAKE_OP(OPNAME, 16, o,  OP16,     STORE);	\
-    MAKE_OP(OPNAME, 16, x,  OP16_X2,  STORE);	\
-    MAKE_OP(OPNAME, 16, y,  OP16_Y2,  STORE);	\
-    MAKE_OP(OPNAME, 16, xy, OP16_XY2, STORE);
+#define PIXOP(OPNAME,STORE)			\
+    MAKE_OP (OPNAME, 8,  o,  OP8,      STORE);	\
+    MAKE_OP (OPNAME, 8,  x,  OP8_X2,   STORE);	\
+    MAKE_OP (OPNAME, 8,  y,  OP8_Y2,   STORE);	\
+    MAKE_OP (OPNAME, 8,  xy, OP8_XY2,  STORE);	\
+    MAKE_OP (OPNAME, 16, o,  OP16,     STORE);	\
+    MAKE_OP (OPNAME, 16, x,  OP16_X2,  STORE);	\
+    MAKE_OP (OPNAME, 16, y,  OP16_Y2,  STORE);	\
+    MAKE_OP (OPNAME, 16, xy, OP16_XY2, STORE);
 
-#define STORE(l, b) stq(l, b)
-PIXOP(put, STORE);
-
+#define STORE(l,b) stq (l, b)
+PIXOP (put, STORE);
 #undef STORE
-#define STORE(l, b) stq(avg2(l, ldq(b)), b);
-PIXOP(avg, STORE);
+#define STORE(l,b) stq (avg2 (l, ldq (b)), b);
+PIXOP (avg, STORE);
 
 mpeg2_mc_t mpeg2_mc_alpha = {
     { MC_put_o_16_alpha, MC_put_x_16_alpha,