# HG changeset patch # User michael # Date 1005074209 0 # Node ID 2cbecedb261603335fbc99e7e4d07ac840aa4aa6 # Parent 01dbf100b4f8a09b1b423a62b60b22b0fb6e9203 15/16 bpp dithering diff -r 01dbf100b4f8 -r 2cbecedb2616 postproc/yuv2rgb_template.c --- a/postproc/yuv2rgb_template.c Tue Nov 06 18:42:22 2001 +0000 +++ b/postproc/yuv2rgb_template.c Tue Nov 06 19:16:49 2001 +0000 @@ -23,7 +23,7 @@ * along with GNU Make; see the file COPYING. If not, write to * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * - * 15 and 24 bpp support from Michael Niedermayer (michaelni@gmx.at) + * 15,24 bpp and dithering from Michael Niedermayer (michaelni@gmx.at) */ #include @@ -38,6 +38,8 @@ #include "rgb2rgb.h" #include "../mmx_defs.h" +#define DITHER1XBPP + /* hope these constant values are cache line aligned */ uint64_t __attribute__((aligned(8))) mmx_80w = 0x0080008000800080; uint64_t __attribute__((aligned(8))) mmx_10w = 0x1010101010101010; @@ -58,6 +60,22 @@ uint64_t __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL; uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL; +// the volatile is required because gcc otherwise optimizes some writes away not knowing that these +// are read in the asm block +volatile uint64_t __attribute__((aligned(8))) b5Dither; +volatile uint64_t __attribute__((aligned(8))) g5Dither; +volatile uint64_t __attribute__((aligned(8))) g6Dither; +volatile uint64_t __attribute__((aligned(8))) r5Dither; + +uint64_t __attribute__((aligned(8))) dither4[2]={ + 0x0103010301030103LL, + 0x0200020002000200LL,}; + +uint64_t __attribute__((aligned(8))) dither8[2]={ + 0x0602060206020602LL, + 0x0004000400040004LL,}; + + #define YUV2RGB \ /* Do the multiply part of the conversion for even and odd pixels, @@ -150,6 +168,11 @@ uint8_t *_pu = pu; uint8_t *_pv = pv; + b5Dither= dither8[y&1]; + g6Dither= dither4[y&1]; + g5Dither= dither8[y&1]; + r5Dither= dither8[(y+1)&1]; + /* load data for start of next scan line */ __asm__ __volatile__ ( "movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ @@ -171,6 +194,11 @@ */ YUV2RGB +#ifdef DITHER1XBPP + "paddusb b5Dither, %%mm0;" + "paddusb g6Dither, %%mm2;" + "paddusb r5Dither, %%mm1;" +#endif /* mask unneeded bits off */ "pand mmx_redmask, %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */ "pand mmx_grnmask, %%mm2;" /* g7g6g5g4 g3g2_0_0 g7g6g5g4 g3g2_0_0 */ @@ -241,6 +269,11 @@ uint8_t *_pu = pu; uint8_t *_pv = pv; + b5Dither= dither8[y&1]; + g6Dither= dither4[y&1]; + g5Dither= dither8[y&1]; + r5Dither= dither8[(y+1)&1]; + /* load data for start of next scan line */ __asm__ __volatile__ ( "movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */ @@ -256,6 +289,12 @@ __asm__ __volatile__ ( YUV2RGB +#ifdef DITHER1XBPP + "paddusb b5Dither, %%mm0 \n\t" + "paddusb g5Dither, %%mm2 \n\t" + "paddusb r5Dither, %%mm1 \n\t" +#endif + /* mask unneeded bits off */ "pand mmx_redmask, %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */ "pand mmx_redmask, %%mm2;" /* g7g6g5g4 g3_0_0_0 g7g6g5g4 g3_0_0_0 */