annotate postproc/swscale_template.c @ 2316:bcb229557e9b

fixed alignment (static variables where sometimes not 8-byte aligned) added half uv interpolation support added prefetch BGR15 support in MMX (untested) (so BGR15,16,24,32 are supported) special unscaled height version (not much faster but it doesnt interpolate uv vertically)
author michael
date Sat, 20 Oct 2001 21:12:09 +0000
parents 7a89cb124e81
children 7d3542955132
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
2 // Software scaling and colorspace conversion routines for MPlayer
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
3
2269
95c48204bcd9 (C) fixed
arpi
parents: 2267
diff changeset
4 // Orginal C implementation by A'rpi/ESP-team <arpi@thot.banki.hu>
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
5 // current version mostly by Michael Niedermayer (michaelni@gmx.at)
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
6 // the parts written by michael are under GNU GPL
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
7
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
8 #include <inttypes.h>
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
9 #include "../config.h"
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
10 #include "swscale.h"
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
11
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
12 //#undef HAVE_MMX2
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
13 //#undef HAVE_MMX
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
14 //#undef ARCH_X86
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
15 #define DITHER1XBPP
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
16 int fullUVIpol=0;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
17 //disables the unscaled height version
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
18 int allwaysIpol=0;
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
19
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
20 #define RET 0xC3 //near return opcode
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
21 /*
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
22 NOTES
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
23
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
24 known BUGS with known cause (no bugreports please!, but patches are welcome :) )
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
25 horizontal MMX2 scaler reads 1-7 samples too much (might cause a sig11)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
26
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
27 Supported output formats BGR15 BGR16 BGR24 BGR32 (15,24 are untested)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
28 BGR15 & BGR16 MMX verions support dithering
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
29 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
30
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
31 TODO
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
32 more intelligent missalignment avoidance for the horizontal scaler
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
33 */
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
34
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
35 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
36
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
37 #ifdef HAVE_MMX2
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
38 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
39 #elif defined (HAVE_3DNOW)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
40 #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
41 #endif
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
42
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
43 #ifdef HAVE_MMX2
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
44 #define MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
45 #else
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
46 #define MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
47 #endif
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
48
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
49
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
50 #ifdef HAVE_MMX
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
51 static uint64_t __attribute__((aligned(8))) yCoeff= 0x2568256825682568LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
52 static uint64_t __attribute__((aligned(8))) ubCoeff= 0x3343334333433343LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
53 static uint64_t __attribute__((aligned(8))) vrCoeff= 0x40cf40cf40cf40cfLL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
54 static uint64_t __attribute__((aligned(8))) ugCoeff= 0xE5E2E5E2E5E2E5E2LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
55 static uint64_t __attribute__((aligned(8))) vgCoeff= 0xF36EF36EF36EF36ELL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
56 static uint64_t __attribute__((aligned(8))) w400= 0x0400040004000400LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
57 static uint64_t __attribute__((aligned(8))) w80= 0x0080008000800080LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
58 static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
59 static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
60 static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
61 static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
62
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
63 static uint64_t __attribute__((aligned(8))) b16Dither= 0x0004000400040004LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
64 static uint64_t __attribute__((aligned(8))) b16Dither1=0x0004000400040004LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
65 static uint64_t __attribute__((aligned(8))) b16Dither2=0x0602060206020602LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
66 static uint64_t __attribute__((aligned(8))) g16Dither= 0x0002000200020002LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
67 static uint64_t __attribute__((aligned(8))) g16Dither1=0x0002000200020002LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
68 static uint64_t __attribute__((aligned(8))) g16Dither2=0x0301030103010301LL;
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
69
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
70 static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
71 static uint64_t __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
72 static uint64_t __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
73 static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
74 static uint64_t __attribute__((aligned(8))) g15Mask= 0x03E003E003E003E0LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
75 static uint64_t __attribute__((aligned(8))) r15Mask= 0x7C007C007C007C00LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
76
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
77 static uint64_t __attribute__((aligned(8))) temp0;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
78 static uint64_t __attribute__((aligned(8))) asm_yalpha1;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
79 static uint64_t __attribute__((aligned(8))) asm_uvalpha1;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
80 #endif
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
81
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
82 // temporary storage for 4 yuv lines:
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
83 // 16bit for now (mmx likes it more compact)
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
84 #ifdef HAVE_MMX
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
85 static uint16_t __attribute__((aligned(8))) pix_buf_y[4][2048];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
86 static uint16_t __attribute__((aligned(8))) pix_buf_uv[2][2048*2];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
87 #else
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
88 static uint16_t pix_buf_y[4][2048];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
89 static uint16_t pix_buf_uv[2][2048*2];
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
90 #endif
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
91
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
92 // clipping helper table for C implementations:
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
93 static unsigned char clip_table[768];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
94
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
95 // yuv->rgb conversion tables:
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
96 static int yuvtab_2568[256];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
97 static int yuvtab_3343[256];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
98 static int yuvtab_0c92[256];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
99 static int yuvtab_1a1e[256];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
100 static int yuvtab_40cf[256];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
101
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
102
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
103 static uint8_t funnyYCode[10000];
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
104 static uint8_t funnyUVCode[10000];
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
105
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
106 #define FULL_YSCALEYUV2RGB \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
107 "pxor %%mm7, %%mm7 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
108 "movd %6, %%mm6 \n\t" /*yalpha1*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
109 "punpcklwd %%mm6, %%mm6 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
110 "punpcklwd %%mm6, %%mm6 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
111 "movd %7, %%mm5 \n\t" /*uvalpha1*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
112 "punpcklwd %%mm5, %%mm5 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
113 "punpcklwd %%mm5, %%mm5 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
114 "xorl %%eax, %%eax \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
115 "1: \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
116 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
117 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
118 "movq (%2, %%eax,2), %%mm2 \n\t" /* uvbuf0[eax]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
119 "movq (%3, %%eax,2), %%mm3 \n\t" /* uvbuf1[eax]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
120 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
121 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
122 "pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
123 "pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
124 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
125 "movq 4096(%2, %%eax,2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
126 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
127 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
128 "movq 4096(%3, %%eax,2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
129 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
130 "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
131 "psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
132 "psubw w400, %%mm3 \n\t" /* 8(U-128)*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
133 "pmulhw yCoeff, %%mm1 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
134 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
135 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
136 "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
137 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
138 "pmulhw ubCoeff, %%mm3 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
139 "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
140 "pmulhw ugCoeff, %%mm2 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
141 "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
142 "psubw w400, %%mm0 \n\t" /* (V-128)8*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
143 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
144 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
145 "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
146 "pmulhw vrCoeff, %%mm0 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
147 "pmulhw vgCoeff, %%mm4 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
148 "paddw %%mm1, %%mm3 \n\t" /* B*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
149 "paddw %%mm1, %%mm0 \n\t" /* R*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
150 "packuswb %%mm3, %%mm3 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
151 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
152 "packuswb %%mm0, %%mm0 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
153 "paddw %%mm4, %%mm2 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
154 "paddw %%mm2, %%mm1 \n\t" /* G*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
155 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
156 "packuswb %%mm1, %%mm1 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
157
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
158 #define YSCALEYUV2RGB \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
159 "movd %6, %%mm6 \n\t" /*yalpha1*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
160 "punpcklwd %%mm6, %%mm6 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
161 "punpcklwd %%mm6, %%mm6 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
162 "movq %%mm6, asm_yalpha1 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
163 "movd %7, %%mm5 \n\t" /*uvalpha1*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
164 "punpcklwd %%mm5, %%mm5 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
165 "punpcklwd %%mm5, %%mm5 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
166 "movq %%mm5, asm_uvalpha1 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
167 "xorl %%eax, %%eax \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
168 "1: \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
169 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
170 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
171 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
172 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
173 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
174 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
175 "movq asm_uvalpha1, %%mm0 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
176 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
177 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
178 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
179 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
180 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
181 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
182 "psubw w400, %%mm3 \n\t" /* (U-128)8*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
183 "psubw w400, %%mm4 \n\t" /* (V-128)8*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
184 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
185 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
186 "pmulhw ugCoeff, %%mm3 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
187 "pmulhw vgCoeff, %%mm4 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
188 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
189 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
190 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
191 "movq 8(%0, %%eax, 2), %%mm6 \n\t" /*buf0[eax]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
192 "movq 8(%1, %%eax, 2), %%mm7 \n\t" /*buf1[eax]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
193 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
194 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
195 "pmulhw asm_yalpha1, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
196 "pmulhw asm_yalpha1, %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
197 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
198 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
199 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
200 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
201 "pmulhw ubCoeff, %%mm2 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
202 "pmulhw vrCoeff, %%mm5 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
203 "psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
204 "psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
205 "pmulhw yCoeff, %%mm1 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
206 "pmulhw yCoeff, %%mm7 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
207 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
208 "paddw %%mm3, %%mm4 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
209 "movq %%mm2, %%mm0 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
210 "movq %%mm5, %%mm6 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
211 "movq %%mm4, %%mm3 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
212 "punpcklwd %%mm2, %%mm2 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
213 "punpcklwd %%mm5, %%mm5 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
214 "punpcklwd %%mm4, %%mm4 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
215 "paddw %%mm1, %%mm2 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
216 "paddw %%mm1, %%mm5 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
217 "paddw %%mm1, %%mm4 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
218 "punpckhwd %%mm0, %%mm0 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
219 "punpckhwd %%mm6, %%mm6 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
220 "punpckhwd %%mm3, %%mm3 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
221 "paddw %%mm7, %%mm0 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
222 "paddw %%mm7, %%mm6 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
223 "paddw %%mm7, %%mm3 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
224 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
225 "packuswb %%mm0, %%mm2 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
226 "packuswb %%mm6, %%mm5 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
227 "packuswb %%mm3, %%mm4 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
228 "pxor %%mm7, %%mm7 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
229
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
230 #define YSCALEYUV2RGB1 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
231 "xorl %%eax, %%eax \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
232 "1: \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
233 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
234 "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
235 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
236 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
237 "psubw w400, %%mm3 \n\t" /* (U-128)8*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
238 "psubw w400, %%mm4 \n\t" /* (V-128)8*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
239 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
240 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
241 "pmulhw ugCoeff, %%mm3 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
242 "pmulhw vgCoeff, %%mm4 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
243 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
244 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
245 "movq 8(%1, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
246 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
247 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
248 "pmulhw ubCoeff, %%mm2 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
249 "pmulhw vrCoeff, %%mm5 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
250 "psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
251 "psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
252 "pmulhw yCoeff, %%mm1 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
253 "pmulhw yCoeff, %%mm7 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
254 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
255 "paddw %%mm3, %%mm4 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
256 "movq %%mm2, %%mm0 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
257 "movq %%mm5, %%mm6 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
258 "movq %%mm4, %%mm3 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
259 "punpcklwd %%mm2, %%mm2 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
260 "punpcklwd %%mm5, %%mm5 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
261 "punpcklwd %%mm4, %%mm4 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
262 "paddw %%mm1, %%mm2 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
263 "paddw %%mm1, %%mm5 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
264 "paddw %%mm1, %%mm4 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
265 "punpckhwd %%mm0, %%mm0 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
266 "punpckhwd %%mm6, %%mm6 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
267 "punpckhwd %%mm3, %%mm3 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
268 "paddw %%mm7, %%mm0 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
269 "paddw %%mm7, %%mm6 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
270 "paddw %%mm7, %%mm3 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
271 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
272 "packuswb %%mm0, %%mm2 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
273 "packuswb %%mm6, %%mm5 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
274 "packuswb %%mm3, %%mm4 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
275 "pxor %%mm7, %%mm7 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
276
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
277 #define WRITEBGR32 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
278 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
279 "movq %%mm2, %%mm1 \n\t" /* B */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
280 "movq %%mm5, %%mm6 \n\t" /* R */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
281 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
282 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
283 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
284 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
285 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
286 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
287 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
288 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
289 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
290 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
291 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
292 MOVNTQ(%%mm0, (%4, %%eax, 4))\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
293 MOVNTQ(%%mm2, 8(%4, %%eax, 4))\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
294 MOVNTQ(%%mm1, 16(%4, %%eax, 4))\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
295 MOVNTQ(%%mm3, 24(%4, %%eax, 4))\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
296 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
297 "addl $8, %%eax \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
298 "cmpl %5, %%eax \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
299 " jb 1b \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
300
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
301 #define WRITEBGR16 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
302 "movq %%mm2, %%mm1 \n\t" /* B */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
303 "movq %%mm4, %%mm3 \n\t" /* G */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
304 "movq %%mm5, %%mm6 \n\t" /* R */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
305 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
306 "punpcklbw %%mm7, %%mm3 \n\t" /* 0G0G0G0G */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
307 "punpcklbw %%mm7, %%mm2 \n\t" /* 0B0B0B0B */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
308 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
309 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
310 "psrlw $3, %%mm2 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
311 "psllw $3, %%mm3 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
312 "psllw $8, %%mm5 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
313 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
314 "pand g16Mask, %%mm3 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
315 "pand r16Mask, %%mm5 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
316 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
317 "por %%mm3, %%mm2 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
318 "por %%mm5, %%mm2 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
319 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
320 "punpckhbw %%mm7, %%mm4 \n\t" /* 0G0G0G0G */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
321 "punpckhbw %%mm7, %%mm1 \n\t" /* 0B0B0B0B */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
322 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
323 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
324 "psrlw $3, %%mm1 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
325 "psllw $3, %%mm4 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
326 "psllw $8, %%mm6 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
327 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
328 "pand g16Mask, %%mm4 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
329 "pand r16Mask, %%mm6 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
330 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
331 "por %%mm4, %%mm1 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
332 "por %%mm6, %%mm1 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
333 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
334 MOVNTQ(%%mm2, (%4, %%eax, 2))\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
335 MOVNTQ(%%mm1, 8(%4, %%eax, 2))\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
336 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
337 "addl $8, %%eax \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
338 "cmpl %5, %%eax \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
339 " jb 1b \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
340
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
341 #define WRITEBGR15 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
342 "movq %%mm2, %%mm1 \n\t" /* B */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
343 "movq %%mm4, %%mm3 \n\t" /* G */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
344 "movq %%mm5, %%mm6 \n\t" /* R */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
345 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
346 "punpcklbw %%mm7, %%mm3 \n\t" /* 0G0G0G0G */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
347 "punpcklbw %%mm7, %%mm2 \n\t" /* 0B0B0B0B */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
348 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
349 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
350 "psrlw $3, %%mm2 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
351 "psllw $2, %%mm3 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
352 "psllw $7, %%mm5 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
353 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
354 "pand g15Mask, %%mm3 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
355 "pand r15Mask, %%mm5 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
356 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
357 "por %%mm3, %%mm2 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
358 "por %%mm5, %%mm2 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
359 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
360 "punpckhbw %%mm7, %%mm4 \n\t" /* 0G0G0G0G */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
361 "punpckhbw %%mm7, %%mm1 \n\t" /* 0B0B0B0B */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
362 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
363 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
364 "psrlw $3, %%mm1 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
365 "psllw $2, %%mm4 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
366 "psllw $7, %%mm6 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
367 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
368 "pand g15Mask, %%mm4 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
369 "pand r15Mask, %%mm6 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
370 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
371 "por %%mm4, %%mm1 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
372 "por %%mm6, %%mm1 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
373 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
374 MOVNTQ(%%mm2, (%4, %%eax, 2))\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
375 MOVNTQ(%%mm1, 8(%4, %%eax, 2))\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
376 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
377 "addl $8, %%eax \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
378 "cmpl %5, %%eax \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
379 " jb 1b \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
380 // FIXME find a faster way to shuffle it to BGR24
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
381 #define WRITEBGR24 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
382 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
383 "movq %%mm2, %%mm1 \n\t" /* B */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
384 "movq %%mm5, %%mm6 \n\t" /* R */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
385 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
386 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
387 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
388 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
389 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
390 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
391 "punpcklbw %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
392 "punpckhbw %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
393 "punpcklbw %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
394 "punpckhbw %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
395 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
396 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
397 "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
398 "pand bm00000111, %%mm4 \n\t" /* 00000RGB 0 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
399 "pand bm11111000, %%mm0 \n\t" /* 00RGB000 0.5 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
400 "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
401 "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
402 "psllq $48, %%mm2 \n\t" /* GB000000 1 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
403 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
404 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
405 "movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
406 "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
407 "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
408 "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
409 "pand bm00001111, %%mm2 \n\t" /* 0000RGBR 1 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
410 "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
411 "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
412 "pand bm00000111, %%mm4 \n\t" /* 00000RGB 2 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
413 "pand bm11111000, %%mm1 \n\t" /* 00RGB000 2.5 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
414 "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
415 "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
416 "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
417 "por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
418 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
419 "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
420 "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
421 "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
422 "pand bm00000111, %%mm5 \n\t" /* 00000RGB 3 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
423 "pand bm11111000, %%mm3 \n\t" /* 00RGB000 3.5 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
424 "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
425 "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
426 "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
427 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
428 "leal (%%eax, %%eax, 2), %%ebx \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
429 MOVNTQ(%%mm0, (%4, %%ebx))\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
430 MOVNTQ(%%mm2, 8(%4, %%ebx))\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
431 MOVNTQ(%%mm3, 16(%4, %%ebx))\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
432 \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
433 "addl $8, %%eax \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
434 "cmpl %5, %%eax \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
435 " jb 1b \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
436
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
437
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
438 /**
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
439 * vertical scale YV12 to RGB
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
440 */
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
441 static inline void yuv2rgbX(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
442 uint8_t *dest, int dstw, int yalpha, int uvalpha, int dstbpp)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
443 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
444 int yalpha1=yalpha^4095;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
445 int uvalpha1=uvalpha^4095;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
446 int i;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
447
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
448 if(fullUVIpol)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
449 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
450
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
451 #ifdef HAVE_MMX
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
452 if(dstbpp == 32)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
453 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
454 asm volatile(
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
455
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
456
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
457 FULL_YSCALEYUV2RGB
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
458 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
459 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
460
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
461 "movq %%mm3, %%mm1 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
462 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
463 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
464
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
465 MOVNTQ(%%mm3, (%4, %%eax, 4))
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
466 MOVNTQ(%%mm1, 8(%4, %%eax, 4))
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
467
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
468 "addl $4, %%eax \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
469 "cmpl %5, %%eax \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
470 " jb 1b \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
471
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
472
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
473 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
474 "m" (yalpha1), "m" (uvalpha1)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
475 : "%eax"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
476 );
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
477 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
478 else if(dstbpp==24)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
479 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
480 asm volatile(
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
481
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
482 FULL_YSCALEYUV2RGB
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
483
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
484 // lsb ... msb
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
485 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
486 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
487
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
488 "movq %%mm3, %%mm1 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
489 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
490 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
491
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
492 "movq %%mm3, %%mm2 \n\t" // BGR0BGR0
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
493 "psrlq $8, %%mm3 \n\t" // GR0BGR00
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
494 "pand bm00000111, %%mm2 \n\t" // BGR00000
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
495 "pand bm11111000, %%mm3 \n\t" // 000BGR00
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
496 "por %%mm2, %%mm3 \n\t" // BGRBGR00
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
497 "movq %%mm1, %%mm2 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
498 "psllq $48, %%mm1 \n\t" // 000000BG
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
499 "por %%mm1, %%mm3 \n\t" // BGRBGRBG
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
500
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
501 "movq %%mm2, %%mm1 \n\t" // BGR0BGR0
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
502 "psrld $16, %%mm2 \n\t" // R000R000
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
503 "psrlq $24, %%mm1 \n\t" // 0BGR0000
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
504 "por %%mm2, %%mm1 \n\t" // RBGRR000
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
505
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
506 "movl %4, %%ebx \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
507 "addl %%eax, %%ebx \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
508
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
509 #ifdef HAVE_MMX2
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
510 //FIXME Alignment
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
511 "movntq %%mm3, (%%ebx, %%eax, 2)\n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
512 "movntq %%mm1, 8(%%ebx, %%eax, 2)\n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
513 #else
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
514 "movd %%mm3, (%%ebx, %%eax, 2) \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
515 "psrlq $32, %%mm3 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
516 "movd %%mm3, 4(%%ebx, %%eax, 2) \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
517 "movd %%mm1, 8(%%ebx, %%eax, 2) \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
518 #endif
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
519 "addl $4, %%eax \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
520 "cmpl %5, %%eax \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
521 " jb 1b \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
522
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
523 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstw),
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
524 "m" (yalpha1), "m" (uvalpha1)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
525 : "%eax", "%ebx"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
526 );
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
527 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
528 else if(dstbpp==15)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
529 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
530 asm volatile(
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
531
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
532 FULL_YSCALEYUV2RGB
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
533 #ifdef DITHER1XBPP
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
534 "paddusb b16Dither, %%mm1 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
535 "paddusb b16Dither, %%mm0 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
536 "paddusb b16Dither, %%mm3 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
537 #endif
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
538 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
539 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
540 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
541
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
542 "psrlw $3, %%mm3 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
543 "psllw $2, %%mm1 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
544 "psllw $7, %%mm0 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
545 "pand g15Mask, %%mm1 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
546 "pand r15Mask, %%mm0 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
547
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
548 "por %%mm3, %%mm1 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
549 "por %%mm1, %%mm0 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
550
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
551 MOVNTQ(%%mm0, (%4, %%eax, 2))
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
552
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
553 "addl $4, %%eax \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
554 "cmpl %5, %%eax \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
555 " jb 1b \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
556
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
557 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
558 "m" (yalpha1), "m" (uvalpha1)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
559 : "%eax"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
560 );
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
561 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
562 else if(dstbpp==16)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
563 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
564 asm volatile(
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
565
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
566 FULL_YSCALEYUV2RGB
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
567 #ifdef DITHER1XBPP
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
568 "paddusb g16Dither, %%mm1 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
569 "paddusb b16Dither, %%mm0 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
570 "paddusb b16Dither, %%mm3 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
571 #endif
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
572 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
573 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
574 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
575
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
576 "psrlw $3, %%mm3 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
577 "psllw $3, %%mm1 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
578 "psllw $8, %%mm0 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
579 "pand g16Mask, %%mm1 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
580 "pand r16Mask, %%mm0 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
581
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
582 "por %%mm3, %%mm1 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
583 "por %%mm1, %%mm0 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
584
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
585 MOVNTQ(%%mm0, (%4, %%eax, 2))
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
586
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
587 "addl $4, %%eax \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
588 "cmpl %5, %%eax \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
589 " jb 1b \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
590
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
591 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
592 "m" (yalpha1), "m" (uvalpha1)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
593 : "%eax"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
594 );
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
595 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
596 #else
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
597 if(dstbpp==32 || dstbpp==24)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
598 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
599 for(i=0;i<dstw;i++){
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
600 // vertical linear interpolation && yuv2rgb in a single step:
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
601 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
602 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
603 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
604 dest[0]=clip_table[((Y + yuvtab_3343[U]) >>13)];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
605 dest[1]=clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
606 dest[2]=clip_table[((Y + yuvtab_40cf[V]) >>13)];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
607 dest+=dstbpp>>3;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
608 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
609 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
610 else if(dstbpp==16)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
611 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
612 for(i=0;i<dstw;i++){
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
613 // vertical linear interpolation && yuv2rgb in a single step:
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
614 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
615 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
616 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
617
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
618 ((uint16_t*)dest)[0] =
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
619 (clip_table[((Y + yuvtab_3343[U]) >>13)]>>3) |
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
620 (clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)]<<3)&0x07E0 |
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
621 (clip_table[((Y + yuvtab_40cf[V]) >>13)]<<8)&0xF800;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
622 dest+=2;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
623 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
624 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
625 else if(dstbpp==15)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
626 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
627 for(i=0;i<dstw;i++){
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
628 // vertical linear interpolation && yuv2rgb in a single step:
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
629 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
630 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
631 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
632
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
633 ((uint16_t*)dest)[0] =
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
634 (clip_table[((Y + yuvtab_3343[U]) >>13)]>>3) |
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
635 (clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)]<<2)&0x03E0 |
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
636 (clip_table[((Y + yuvtab_40cf[V]) >>13)]<<7)&0x7C00;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
637 dest+=2;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
638 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
639 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
640 #endif
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
641 }//FULL_UV_IPOL
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
642 else
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
643 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
644 #ifdef HAVE_MMX
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
645 if(dstbpp == 32)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
646 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
647 asm volatile(
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
648 YSCALEYUV2RGB
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
649 WRITEBGR32
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
650
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
651 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
652 "m" (yalpha1), "m" (uvalpha1)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
653 : "%eax"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
654 );
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
655 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
656 else if(dstbpp==24)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
657 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
658 asm volatile(
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
659 YSCALEYUV2RGB
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
660 WRITEBGR24
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
661
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
662 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
663 "m" (yalpha1), "m" (uvalpha1)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
664 : "%eax", "%ebx"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
665 );
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
666 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
667 else if(dstbpp==15)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
668 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
669 asm volatile(
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
670 YSCALEYUV2RGB
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
671 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
672 #ifdef DITHER1XBPP
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
673 "paddusb b16Dither, %%mm2 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
674 "paddusb b16Dither, %%mm4 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
675 "paddusb b16Dither, %%mm5 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
676 #endif
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
677
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
678 WRITEBGR15
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
679
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
680 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
681 "m" (yalpha1), "m" (uvalpha1)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
682 : "%eax"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
683 );
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
684 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
685 else if(dstbpp==16)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
686 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
687 asm volatile(
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
688 YSCALEYUV2RGB
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
689 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
690 #ifdef DITHER1XBPP
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
691 "paddusb g16Dither, %%mm2 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
692 "paddusb b16Dither, %%mm4 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
693 "paddusb b16Dither, %%mm5 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
694 #endif
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
695
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
696 WRITEBGR16
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
697
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
698 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
699 "m" (yalpha1), "m" (uvalpha1)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
700 : "%eax"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
701 );
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
702 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
703 #else
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
704 //FIXME unroll C loop and dont recalculate UV
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
705 if(dstbpp==32 || dstbpp==24)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
706 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
707 for(i=0;i<dstw;i++){
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
708 // vertical linear interpolation && yuv2rgb in a single step:
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
709 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
710 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
711 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
712 dest[0]=clip_table[((Y + yuvtab_3343[U]) >>13)];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
713 dest[1]=clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
714 dest[2]=clip_table[((Y + yuvtab_40cf[V]) >>13)];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
715 dest+=dstbpp>>3;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
716 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
717 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
718 else if(dstbpp==16)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
719 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
720 for(i=0;i<dstw;i++){
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
721 // vertical linear interpolation && yuv2rgb in a single step:
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
722 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
723 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
724 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
725
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
726 ((uint16_t*)dest)[0] =
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
727 (clip_table[((Y + yuvtab_3343[U]) >>13)]>>3) |
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
728 (clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)]<<3)&0x07E0 |
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
729 (clip_table[((Y + yuvtab_40cf[V]) >>13)]<<8)&0xF800;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
730 dest+=2;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
731 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
732 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
733 else if(dstbpp==15)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
734 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
735 for(i=0;i<dstw;i++){
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
736 // vertical linear interpolation && yuv2rgb in a single step:
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
737 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
738 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
739 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
740
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
741 ((uint16_t*)dest)[0] =
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
742 (clip_table[((Y + yuvtab_3343[U]) >>13)]>>3) |
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
743 (clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)]<<2)&0x03E0 |
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
744 (clip_table[((Y + yuvtab_40cf[V]) >>13)]<<7)&0x7C00;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
745 dest+=2;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
746 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
747 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
748 #endif
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
749 } //!FULL_UV_IPOL
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
750 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
751
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
752 /**
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
753 * YV12 to RGB without scaling or interpolating
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
754 */
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
755 static inline void yuv2rgb1(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
756 uint8_t *dest, int dstw, int yalpha, int uvalpha, int dstbpp)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
757 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
758 int yalpha1=yalpha^4095;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
759 int uvalpha1=uvalpha^4095;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
760 int i;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
761 if(fullUVIpol || allwaysIpol)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
762 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
763 yuv2rgbX(buf0, buf1, uvbuf0, uvbuf1, dest, dstw, yalpha, uvalpha, dstbpp);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
764 return;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
765 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
766 #ifdef HAVE_MMX
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
767 if(dstbpp == 32)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
768 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
769 asm volatile(
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
770 YSCALEYUV2RGB1
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
771 WRITEBGR32
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
772 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
773 "m" (yalpha1), "m" (uvalpha1)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
774 : "%eax"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
775 );
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
776 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
777 else if(dstbpp==24)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
778 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
779 asm volatile(
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
780 YSCALEYUV2RGB1
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
781 WRITEBGR24
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
782 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
783 "m" (yalpha1), "m" (uvalpha1)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
784 : "%eax", "%ebx"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
785 );
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
786 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
787 else if(dstbpp==15)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
788 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
789 asm volatile(
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
790 YSCALEYUV2RGB1
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
791 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
792 #ifdef DITHER1XBPP
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
793 "paddusb b16Dither, %%mm2 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
794 "paddusb b16Dither, %%mm4 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
795 "paddusb b16Dither, %%mm5 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
796 #endif
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
797 WRITEBGR15
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
798 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
799 "m" (yalpha1), "m" (uvalpha1)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
800 : "%eax"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
801 );
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
802 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
803 else if(dstbpp==16)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
804 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
805 asm volatile(
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
806 YSCALEYUV2RGB1
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
807 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
808 #ifdef DITHER1XBPP
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
809 "paddusb g16Dither, %%mm2 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
810 "paddusb b16Dither, %%mm4 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
811 "paddusb b16Dither, %%mm5 \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
812 #endif
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
813
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
814 WRITEBGR16
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
815 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw),
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
816 "m" (yalpha1), "m" (uvalpha1)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
817 : "%eax"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
818 );
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
819 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
820 #else
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
821 //FIXME unroll C loop and dont recalculate UV
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
822 if(dstbpp==32 || dstbpp==24)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
823 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
824 for(i=0;i<dstw;i++){
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
825 // vertical linear interpolation && yuv2rgb in a single step:
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
826 int Y=yuvtab_2568[buf0[i]>>7];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
827 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
828 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
829 dest[0]=clip_table[((Y + yuvtab_3343[U]) >>13)];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
830 dest[1]=clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
831 dest[2]=clip_table[((Y + yuvtab_40cf[V]) >>13)];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
832 dest+=dstbpp>>3;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
833 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
834 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
835 else if(dstbpp==16)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
836 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
837 for(i=0;i<dstw;i++){
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
838 // vertical linear interpolation && yuv2rgb in a single step:
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
839 int Y=yuvtab_2568[buf0[i]>>7];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
840 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
841 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
842
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
843 ((uint16_t*)dest)[0] =
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
844 (clip_table[((Y + yuvtab_3343[U]) >>13)]>>3) |
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
845 (clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)]<<3)&0x07E0 |
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
846 (clip_table[((Y + yuvtab_40cf[V]) >>13)]<<8)&0xF800;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
847 dest+=2;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
848 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
849 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
850 else if(dstbpp==15)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
851 {
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
852 for(i=0;i<dstw;i++){
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
853 // vertical linear interpolation && yuv2rgb in a single step:
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
854 int Y=yuvtab_2568[buf0[i]>>7];
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
855 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
856 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
857
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
858 ((uint16_t*)dest)[0] =
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
859 (clip_table[((Y + yuvtab_3343[U]) >>13)]>>3) |
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
860 (clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)]<<2)&0x03E0 |
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
861 (clip_table[((Y + yuvtab_40cf[V]) >>13)]<<7)&0x7C00;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
862 dest+=2;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
863 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
864 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
865 #endif
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
866 }
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
867
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
868
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
869
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
870
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
871 // *** bilinear scaling and yuv->rgb conversion of yv12 slices:
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
872 // *** Note: it's called multiple times while decoding a frame, first time y==0
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
873 // *** Designed to upscale, but may work for downscale too.
2274
9ee34c6950e0 xinc scaled by 16 instead of 8
arpi
parents: 2271
diff changeset
874 // s_xinc = (src_width << 16) / dst_width
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
875 // s_yinc = (src_height << 16) / dst_height
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
876 void SwScale_YV12slice_brg24(unsigned char* srcptr[],int stride[], int y, int h,
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
877 unsigned char* dstptr, int dststride, int dstw, int dstbpp,
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
878 unsigned int s_xinc,unsigned int s_yinc){
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
879
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
880 // scaling factors:
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
881 //static int s_yinc=(vo_dga_src_height<<16)/vo_dga_vp_height;
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
882 //static int s_xinc=(vo_dga_src_width<<8)/vo_dga_vp_width;
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
883
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
884 unsigned int s_xinc2;
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
885
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
886 static int s_srcypos; // points to the dst Pixels center in the source (0 is the center of pixel 0,0 in src)
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
887 static int s_ypos;
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
888
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
889 // last horzontally interpolated lines, used to avoid unnecessary calculations
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
890 static int s_last_ypos;
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
891 static int s_last_y1pos;
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
892
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
893 static int static_dstw;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
894
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
895 #ifdef HAVE_MMX2
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
896 // used to detect a horizontal size change
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
897 static int old_dstw= -1;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
898 static int old_s_xinc= -1;
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
899 #endif
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
900
2279
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
901 int canMMX2BeUsed=0;
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
902 int srcWidth= (dstw*s_xinc + 0x8000)>>16;
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
903 int dstUVw= fullUVIpol ? dstw : dstw/2;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
904
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
905
2270
56ca174d8169 vertical lines bugfix
michael
parents: 2269
diff changeset
906 #ifdef HAVE_MMX2
2279
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
907 canMMX2BeUsed= (s_xinc <= 0x10000 && (dstw&31)==0 && (srcWidth&15)==0) ? 1 : 0;
2270
56ca174d8169 vertical lines bugfix
michael
parents: 2269
diff changeset
908 #endif
56ca174d8169 vertical lines bugfix
michael
parents: 2269
diff changeset
909
2279
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
910 // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
911 // n-2 is the last chrominance sample available
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
912 // FIXME this is not perfect, but noone shuld notice the difference, the more correct variant
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
913 // would be like the vertical one, but that would require some special code for the
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
914 // first and last pixel
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
915 if(canMMX2BeUsed) s_xinc+= 20;
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
916 else s_xinc = ((srcWidth-2)<<16)/(dstw-2) - 20;
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
917
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
918 if(fullUVIpol) s_xinc2= s_xinc>>1;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
919 else s_xinc2= s_xinc;
2271
a8d57bbc4413 horizontal lines bugfix
michael
parents: 2270
diff changeset
920 // force calculation of the horizontal interpolation of the first line
a8d57bbc4413 horizontal lines bugfix
michael
parents: 2270
diff changeset
921 s_last_ypos=-99;
a8d57bbc4413 horizontal lines bugfix
michael
parents: 2270
diff changeset
922 s_last_y1pos=-99;
a8d57bbc4413 horizontal lines bugfix
michael
parents: 2270
diff changeset
923
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
924 if(y==0){
2297
7a89cb124e81 first line bug fixed (happend if the image was scaled down or 1:1)
michael
parents: 2279
diff changeset
925 s_srcypos=-0x8000;
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
926 s_ypos=0;
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
927 #ifdef HAVE_MMX2
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
928 // cant downscale !!!
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
929 if((old_s_xinc != s_xinc || old_dstw!=dstw) && canMMX2BeUsed)
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
930 {
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
931 uint8_t *fragment;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
932 int imm8OfPShufW1;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
933 int imm8OfPShufW2;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
934 int fragmentLength;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
935
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
936 int xpos, xx, xalpha, i;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
937
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
938 old_s_xinc= s_xinc;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
939 old_dstw= dstw;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
940
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
941 static_dstw= dstw;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
942
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
943 // create an optimized horizontal scaling routine
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
944
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
945 //code fragment
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
946
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
947 asm volatile(
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
948 "jmp 9f \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
949 // Begin
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
950 "0: \n\t"
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
951 "movq (%%esi), %%mm0 \n\t" //FIXME Alignment
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
952 "movq %%mm0, %%mm1 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
953 "psrlq $8, %%mm0 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
954 "punpcklbw %%mm7, %%mm1 \n\t"
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
955 "movq %%mm2, %%mm3 \n\t"
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
956 "punpcklbw %%mm7, %%mm0 \n\t"
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
957 "addw %%bx, %%cx \n\t" //2*xalpha += (4*s_xinc)&0xFFFF
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
958 "pshufw $0xFF, %%mm1, %%mm1 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
959 "1: \n\t"
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
960 "adcl %%edx, %%esi \n\t" //xx+= (4*s_xinc)>>16 + carry
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
961 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
962 "2: \n\t"
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
963 "psrlw $9, %%mm3 \n\t"
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
964 "psubw %%mm1, %%mm0 \n\t"
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
965 "pmullw %%mm3, %%mm0 \n\t"
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
966 "paddw %%mm6, %%mm2 \n\t" // 2*alpha += xpos&0xFFFF
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
967 "psllw $7, %%mm1 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
968 "paddw %%mm1, %%mm0 \n\t"
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
969
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
970 "movq %%mm0, (%%edi, %%eax) \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
971
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
972 "addl $8, %%eax \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
973 // End
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
974 "9: \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
975 // "int $3\n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
976 "leal 0b, %0 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
977 "leal 1b, %1 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
978 "leal 2b, %2 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
979 "decl %1 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
980 "decl %2 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
981 "subl %0, %1 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
982 "subl %0, %2 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
983 "leal 9b, %3 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
984 "subl %0, %3 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
985 :"=r" (fragment), "=r" (imm8OfPShufW1), "=r" (imm8OfPShufW2),
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
986 "=r" (fragmentLength)
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
987 );
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
988
2279
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
989 xpos= 0; //s_xinc/2 - 0x8000; // difference between pixel centers
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
990
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
991 /* choose xinc so that all 8 parts fit exactly
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
992 Note: we cannot use just 1 part because it would not fit in the code cache */
2279
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
993 // s_xinc2_diff= -((((s_xinc2*(dstw/8))&0xFFFF))/(dstw/8))-10;
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
994 // s_xinc_diff= -((((s_xinc*(dstw/8))&0xFFFF))/(dstw/8));
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
995 #ifdef ALT_ERROR
2279
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
996 // s_xinc2_diff+= ((0x10000/(dstw/8)));
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
997 #endif
2279
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
998 // s_xinc_diff= s_xinc2_diff*2;
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
999
2279
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
1000 // s_xinc2+= s_xinc2_diff;
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
1001 // s_xinc+= s_xinc_diff;
2270
56ca174d8169 vertical lines bugfix
michael
parents: 2269
diff changeset
1002
2279
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
1003 // old_s_xinc= s_xinc;
2270
56ca174d8169 vertical lines bugfix
michael
parents: 2269
diff changeset
1004
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1005 for(i=0; i<dstw/8; i++)
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1006 {
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1007 int xx=xpos>>16;
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1008
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1009 if((i&3) == 0)
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1010 {
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1011 int a=0;
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1012 int b=((xpos+s_xinc)>>16) - xx;
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1013 int c=((xpos+s_xinc*2)>>16) - xx;
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1014 int d=((xpos+s_xinc*3)>>16) - xx;
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1015
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1016 memcpy(funnyYCode + fragmentLength*i/4, fragment, fragmentLength);
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1017
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1018 funnyYCode[fragmentLength*i/4 + imm8OfPShufW1]=
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1019 funnyYCode[fragmentLength*i/4 + imm8OfPShufW2]=
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1020 a | (b<<2) | (c<<4) | (d<<6);
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1021
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1022 // if we dont need to read 8 bytes than dont :), reduces the chance of
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1023 // crossing a cache line
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1024 if(d<3) funnyYCode[fragmentLength*i/4 + 1]= 0x6E;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1025
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1026 funnyYCode[fragmentLength*(i+4)/4]= RET;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1027 }
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1028 xpos+=s_xinc;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1029 }
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1030
2279
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
1031 xpos= 0; //s_xinc2/2 - 0x10000; // difference between centers of chrom samples
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1032 for(i=0; i<dstUVw/8; i++)
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1033 {
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1034 int xx=xpos>>16;
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1035
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1036 if((i&3) == 0)
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1037 {
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1038 int a=0;
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1039 int b=((xpos+s_xinc2)>>16) - xx;
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1040 int c=((xpos+s_xinc2*2)>>16) - xx;
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1041 int d=((xpos+s_xinc2*3)>>16) - xx;
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1042
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1043 memcpy(funnyUVCode + fragmentLength*i/4, fragment, fragmentLength);
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1044
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1045 funnyUVCode[fragmentLength*i/4 + imm8OfPShufW1]=
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1046 funnyUVCode[fragmentLength*i/4 + imm8OfPShufW2]=
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1047 a | (b<<2) | (c<<4) | (d<<6);
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1048
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1049 // if we dont need to read 8 bytes than dont :), reduces the chance of
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1050 // crossing a cache line
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1051 if(d<3) funnyUVCode[fragmentLength*i/4 + 1]= 0x6E;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1052
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1053 funnyUVCode[fragmentLength*(i+4)/4]= RET;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1054 }
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1055 xpos+=s_xinc2;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1056 }
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1057 // funnyCode[0]= RET;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1058 }
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1059
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1060 #endif // HAVE_MMX2
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1061 } // reset counters
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1062
2270
56ca174d8169 vertical lines bugfix
michael
parents: 2269
diff changeset
1063
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1064 while(1){
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1065 unsigned char *dest=dstptr+dststride*s_ypos;
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1066 int y0=(s_srcypos + 0xFFFF)>>16; // first luminance source line number below the dst line
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1067 // points to the dst Pixels center in the source (0 is the center of pixel 0,0 in src)
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1068 int srcuvpos= s_srcypos + s_yinc/2 - 0x8000;
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1069 int y1=(srcuvpos + 0x1FFFF)>>17; // first chrominance source line number below the dst line
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1070 int yalpha=((s_srcypos-1)&0xFFFF)>>4;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1071 int uvalpha=((srcuvpos-1)&0x1FFFF)>>5;
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1072 uint16_t *buf0=pix_buf_y[y0&1]; // top line of the interpolated slice
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1073 uint16_t *buf1=pix_buf_y[((y0+1)&1)]; // bottom line of the interpolated slice
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1074 uint16_t *uvbuf0=pix_buf_uv[y1&1]; // top line of the interpolated slice
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1075 uint16_t *uvbuf1=pix_buf_uv[(y1+1)&1]; // bottom line of the interpolated slice
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1076 int i;
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1077
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1078 // if this is before the first line than use only the first src line
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1079 if(y0==0) buf0= buf1;
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1080 if(y1==0) uvbuf0= uvbuf1; // yes we do have to check this, its not the same as y0==0
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1081
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1082 if(y0>=y+h) break; // FIXME wrong, skips last lines, but they are dupliactes anyway
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1083
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1084 // if this is after the last line than use only the last src line
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1085 if(y0>=y+h)
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1086 {
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1087 buf1= buf0;
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1088 s_last_ypos=y0;
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1089 }
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1090 if(y1>=(y+h)/2)
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1091 {
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1092 uvbuf1= uvbuf0;
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1093 s_last_y1pos=y1;
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1094 }
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1095
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1096
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1097 s_ypos++; s_srcypos+=s_yinc;
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1098
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1099 //only interpolate the src line horizontally if we didnt do it allready
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1100 if(s_last_ypos!=y0){
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1101 unsigned char *src=srcptr[0]+(y0-y)*stride[0];
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1102 unsigned int xpos=0;
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1103 s_last_ypos=y0;
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1104 // *** horizontal scale Y line to temp buffer
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1105 #ifdef ARCH_X86
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1106
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1107 #ifdef HAVE_MMX2
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1108 if(canMMX2BeUsed)
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1109 {
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1110 asm volatile(
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1111 "pxor %%mm7, %%mm7 \n\t"
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1112 "pxor %%mm2, %%mm2 \n\t" // 2*xalpha
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1113 "movd %5, %%mm6 \n\t" // s_xinc&0xFFFF
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1114 "punpcklwd %%mm6, %%mm6 \n\t"
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1115 "punpcklwd %%mm6, %%mm6 \n\t"
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1116 "movq %%mm6, %%mm2 \n\t"
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1117 "psllq $16, %%mm2 \n\t"
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1118 "paddw %%mm6, %%mm2 \n\t"
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1119 "psllq $16, %%mm2 \n\t"
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1120 "paddw %%mm6, %%mm2 \n\t"
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1121 "psllq $16, %%mm2 \n\t" //0,t,2t,3t t=s_xinc&0xFF
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1122 "movq %%mm2, temp0 \n\t"
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1123 "movd %4, %%mm6 \n\t" //(s_xinc*4)&0xFFFF
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1124 "punpcklwd %%mm6, %%mm6 \n\t"
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1125 "punpcklwd %%mm6, %%mm6 \n\t"
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1126 "xorl %%eax, %%eax \n\t" // i
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1127 "movl %0, %%esi \n\t" // src
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1128 "movl %1, %%edi \n\t" // buf1
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1129 "movl %3, %%edx \n\t" // (s_xinc*4)>>16
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1130 "xorl %%ecx, %%ecx \n\t"
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1131 "xorl %%ebx, %%ebx \n\t"
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1132 "movw %4, %%bx \n\t" // (s_xinc*4)&0xFFFF
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1133 #ifdef HAVE_MMX2
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1134 #define FUNNY_Y_CODE \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1135 "prefetchnta 1024(%%esi) \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1136 "prefetchnta 1056(%%esi) \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1137 "prefetchnta 1088(%%esi) \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1138 "call funnyYCode \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1139 "movq temp0, %%mm2 \n\t"\
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1140 "xorl %%ecx, %%ecx \n\t"
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1141 #else
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1142 #define FUNNY_Y_CODE \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1143 "call funnyYCode \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1144 "movq temp0, %%mm2 \n\t"\
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1145 "xorl %%ecx, %%ecx \n\t"
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1146 #endif
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1147 FUNNY_Y_CODE
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1148 FUNNY_Y_CODE
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1149 FUNNY_Y_CODE
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1150 FUNNY_Y_CODE
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1151 FUNNY_Y_CODE
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1152 FUNNY_Y_CODE
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1153 FUNNY_Y_CODE
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1154 FUNNY_Y_CODE
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1155
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1156 :: "m" (src), "m" (buf1), "m" (dstw), "m" ((s_xinc*4)>>16),
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1157 "m" ((s_xinc*4)&0xFFFF), "m" (s_xinc&0xFFFF)
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1158 : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1159 );
2279
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
1160 for(i=dstw-1; (i*s_xinc)>>16 >=srcWidth-1; i--) buf1[i] = src[srcWidth-1]*128;
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1161 }
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1162 else
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1163 {
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1164 #endif
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1165 //NO MMX just normal asm ...
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1166 asm volatile(
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1167 "xorl %%eax, %%eax \n\t" // i
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1168 "xorl %%ebx, %%ebx \n\t" // xx
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1169 "xorl %%ecx, %%ecx \n\t" // 2*xalpha
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1170 "1: \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1171 "movzbl (%0, %%ebx), %%edi \n\t" //src[xx]
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1172 "movzbl 1(%0, %%ebx), %%esi \n\t" //src[xx+1]
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1173 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1174 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1175 "shll $16, %%edi \n\t"
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1176 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
2233
5d8a0bc932c4 fixed out of regs "bug"
michael
parents: 2232
diff changeset
1177 "movl %1, %%edi \n\t"
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1178 "shrl $9, %%esi \n\t"
2233
5d8a0bc932c4 fixed out of regs "bug"
michael
parents: 2232
diff changeset
1179 "movw %%si, (%%edi, %%eax, 2) \n\t"
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1180 "addw %4, %%cx \n\t" //2*xalpha += s_xinc&0xFF
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1181 "adcl %3, %%ebx \n\t" //xx+= s_xinc>>8 + carry
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1182
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1183 "movzbl (%0, %%ebx), %%edi \n\t" //src[xx]
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1184 "movzbl 1(%0, %%ebx), %%esi \n\t" //src[xx+1]
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1185 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1186 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1187 "shll $16, %%edi \n\t"
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1188 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
2233
5d8a0bc932c4 fixed out of regs "bug"
michael
parents: 2232
diff changeset
1189 "movl %1, %%edi \n\t"
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1190 "shrl $9, %%esi \n\t"
2233
5d8a0bc932c4 fixed out of regs "bug"
michael
parents: 2232
diff changeset
1191 "movw %%si, 2(%%edi, %%eax, 2) \n\t"
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1192 "addw %4, %%cx \n\t" //2*xalpha += s_xinc&0xFF
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1193 "adcl %3, %%ebx \n\t" //xx+= s_xinc>>8 + carry
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1194
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1195
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1196 "addl $2, %%eax \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1197 "cmpl %2, %%eax \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1198 " jb 1b \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1199
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1200
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1201 :: "r" (src), "m" (buf1), "m" (dstw), "m" (s_xinc>>16), "m" (s_xinc&0xFFFF)
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1202 : "%eax", "%ebx", "%ecx", "%edi", "%esi"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1203 );
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1204 #ifdef HAVE_MMX2
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1205 } //if MMX2 cant be used
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1206 #endif
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1207 #else
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1208 for(i=0;i<dstw;i++){
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1209 register unsigned int xx=xpos>>16;
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1210 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1211 buf1[i]=(src[xx]*(xalpha^127)+src[xx+1]*xalpha);
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1212 xpos+=s_xinc;
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1213 }
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1214 #endif
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1215 }
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1216 // *** horizontal scale U and V lines to temp buffer
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1217 if(s_last_y1pos!=y1){
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1218 unsigned char *src1=srcptr[1]+(y1-y/2)*stride[1];
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1219 unsigned char *src2=srcptr[2]+(y1-y/2)*stride[2];
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1220 int xpos=0;
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1221 s_last_y1pos= y1;
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1222 #ifdef ARCH_X86
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1223 #ifdef HAVE_MMX2
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1224 if(canMMX2BeUsed)
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1225 {
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1226 asm volatile(
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1227 "pxor %%mm7, %%mm7 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1228 "pxor %%mm2, %%mm2 \n\t" // 2*xalpha
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1229 "movd %5, %%mm6 \n\t" // s_xinc&0xFFFF
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1230 "punpcklwd %%mm6, %%mm6 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1231 "punpcklwd %%mm6, %%mm6 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1232 "movq %%mm6, %%mm2 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1233 "psllq $16, %%mm2 \n\t"
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1234 "paddw %%mm6, %%mm2 \n\t"
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1235 "psllq $16, %%mm2 \n\t"
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1236 "paddw %%mm6, %%mm2 \n\t"
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1237 "psllq $16, %%mm2 \n\t" //0,t,2t,3t t=s_xinc&0xFFFF
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1238 "movq %%mm2, temp0 \n\t"
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1239 "movd %4, %%mm6 \n\t" //(s_xinc*4)&0xFFFF
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1240 "punpcklwd %%mm6, %%mm6 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1241 "punpcklwd %%mm6, %%mm6 \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1242 "xorl %%eax, %%eax \n\t" // i
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1243 "movl %0, %%esi \n\t" // src
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1244 "movl %1, %%edi \n\t" // buf1
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1245 "movl %3, %%edx \n\t" // (s_xinc*4)>>16
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1246 "xorl %%ecx, %%ecx \n\t"
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1247 "xorl %%ebx, %%ebx \n\t"
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1248 "movw %4, %%bx \n\t" // (s_xinc*4)&0xFFFF
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1249
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1250 #ifdef HAVE_MMX2
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1251 #define FUNNYUVCODE \
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1252 "prefetchnta 1024(%%esi) \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1253 "prefetchnta 1056(%%esi) \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1254 "prefetchnta 1088(%%esi) \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1255 "call funnyUVCode \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1256 "movq temp0, %%mm2 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1257 "xorl %%ecx, %%ecx \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1258 #else
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1259 #define FUNNYUVCODE \
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1260 "call funnyUVCode \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1261 "movq temp0, %%mm2 \n\t"\
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1262 "xorl %%ecx, %%ecx \n\t"
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1263 #endif
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1265 FUNNYUVCODE
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1266 FUNNYUVCODE
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1267 FUNNYUVCODE
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1268 FUNNYUVCODE
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1269
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1270 FUNNYUVCODE
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1271 FUNNYUVCODE
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1272 FUNNYUVCODE
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1273 FUNNYUVCODE
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1274
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1275
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1276 "xorl %%eax, %%eax \n\t" // i
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1277 "movl %6, %%esi \n\t" // src
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1278 "movl %1, %%edi \n\t" // buf1
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1279 "addl $4096, %%edi \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1280
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1281 FUNNYUVCODE
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1282 FUNNYUVCODE
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1283 FUNNYUVCODE
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1284 FUNNYUVCODE
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1285
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1286 FUNNYUVCODE
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1287 FUNNYUVCODE
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1288 FUNNYUVCODE
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1289 FUNNYUVCODE
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1290
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1291 :: "m" (src1), "m" (uvbuf1), "m" (dstUVw), "m" ((s_xinc2*4)>>16),
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1292 "m" ((s_xinc2*4)&0xFFFF), "m" (s_xinc2&0xFFFF), "m" (src2)
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1293 : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1294 );
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1295 for(i=dstUVw-1; (i*s_xinc2)>>16 >=srcWidth/2-1; i--)
2279
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
1296 {
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
1297 uvbuf1[i] = src1[srcWidth/2-1]*128;
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
1298 uvbuf1[i+2048] = src2[srcWidth/2-1]*128;
9b9c3363abbe horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents: 2274
diff changeset
1299 }
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1300 }
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1301 else
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1302 {
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1303 #endif
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1304 asm volatile(
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1305 "xorl %%eax, %%eax \n\t" // i
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1306 "xorl %%ebx, %%ebx \n\t" // xx
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1307 "xorl %%ecx, %%ecx \n\t" // 2*xalpha
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1308 "1: \n\t"
2233
5d8a0bc932c4 fixed out of regs "bug"
michael
parents: 2232
diff changeset
1309 "movl %0, %%esi \n\t"
5d8a0bc932c4 fixed out of regs "bug"
michael
parents: 2232
diff changeset
1310 "movzbl (%%esi, %%ebx), %%edi \n\t" //src[xx]
5d8a0bc932c4 fixed out of regs "bug"
michael
parents: 2232
diff changeset
1311 "movzbl 1(%%esi, %%ebx), %%esi \n\t" //src[xx+1]
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1312 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1313 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1314 "shll $16, %%edi \n\t"
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1315 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1316 "movl %1, %%edi \n\t"
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1317 "shrl $9, %%esi \n\t"
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1318 "movw %%si, (%%edi, %%eax, 2) \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1319
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1320 "movzbl (%5, %%ebx), %%edi \n\t" //src[xx]
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1321 "movzbl 1(%5, %%ebx), %%esi \n\t" //src[xx+1]
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1322 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx]
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1323 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1324 "shll $16, %%edi \n\t"
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1325 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha)
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1326 "movl %1, %%edi \n\t"
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1327 "shrl $9, %%esi \n\t"
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1328 "movw %%si, 4096(%%edi, %%eax, 2)\n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1329
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1330 "addw %4, %%cx \n\t" //2*xalpha += s_xinc&0xFF
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1331 "adcl %3, %%ebx \n\t" //xx+= s_xinc>>8 + carry
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1332 "addl $1, %%eax \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1333 "cmpl %2, %%eax \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1334 " jb 1b \n\t"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1335
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1336 :: "m" (src1), "m" (uvbuf1), "m" (dstUVw), "m" (s_xinc2>>16), "m" (s_xinc2&0xFFFF),
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1337 "r" (src2)
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1338 : "%eax", "%ebx", "%ecx", "%edi", "%esi"
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1339 );
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1340 #ifdef HAVE_MMX2
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1341 } //if MMX2 cant be used
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1342 #endif
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1343 #else
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1344 for(i=0;i<dstUVw;i++){
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1345 register unsigned int xx=xpos>>16;
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1346 register unsigned int xalpha=(xpos&0xFFFF)>>9;
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1347 uvbuf1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1348 uvbuf1[i+2048]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1349 xpos+=s_xinc2;
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
1350 }
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1351 #endif
2297
7a89cb124e81 first line bug fixed (happend if the image was scaled down or 1:1)
michael
parents: 2279
diff changeset
1352 // if this is the line before the first line
7a89cb124e81 first line bug fixed (happend if the image was scaled down or 1:1)
michael
parents: 2279
diff changeset
1353 if(s_srcypos == s_xinc - 0x8000)
7a89cb124e81 first line bug fixed (happend if the image was scaled down or 1:1)
michael
parents: 2279
diff changeset
1354 {
7a89cb124e81 first line bug fixed (happend if the image was scaled down or 1:1)
michael
parents: 2279
diff changeset
1355 s_srcypos= s_yinc/2 - 0x8000;
7a89cb124e81 first line bug fixed (happend if the image was scaled down or 1:1)
michael
parents: 2279
diff changeset
1356 continue;
7a89cb124e81 first line bug fixed (happend if the image was scaled down or 1:1)
michael
parents: 2279
diff changeset
1357 }
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1358 }
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1359
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1360 if(ABS(s_yinc - 0x10000) < 10)
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1361 yuv2rgb1(buf0, buf1, uvbuf0, uvbuf1, dest, dstw, yalpha, uvalpha, dstbpp);
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1362 else
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1363 yuv2rgbX(buf0, buf1, uvbuf0, uvbuf1, dest, dstw, yalpha, uvalpha, dstbpp);
2237
00a46cd41edd 24bpp support (untested)
michael
parents: 2236
diff changeset
1364
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1365 #ifdef HAVE_MMX
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1366 b16Dither= b16Dither1;
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1367 b16Dither1= b16Dither2;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1368 b16Dither2= b16Dither;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1369
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1370 g16Dither= g16Dither1;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1371 g16Dither1= g16Dither2;
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
1372 g16Dither2= g16Dither;
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
1373 #endif
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1374 }
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1375
2236
32027d53eacb added (f)emms
arpi
parents: 2235
diff changeset
1376 #ifdef HAVE_3DNOW
32027d53eacb added (f)emms
arpi
parents: 2235
diff changeset
1377 asm volatile("femms");
32027d53eacb added (f)emms
arpi
parents: 2235
diff changeset
1378 #elif defined (HAVE_MMX)
32027d53eacb added (f)emms
arpi
parents: 2235
diff changeset
1379 asm volatile("emms");
32027d53eacb added (f)emms
arpi
parents: 2235
diff changeset
1380 #endif
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1381 }
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1382
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1383
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1384 void SwScale_Init(){
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1385 // generating tables:
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1386 int i;
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1387 for(i=0;i<256;i++){
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1388 clip_table[i]=0;
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1389 clip_table[i+256]=i;
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1390 clip_table[i+512]=255;
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1391 yuvtab_2568[i]=(0x2568*(i-16))+(256<<13);
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1392 yuvtab_3343[i]=0x3343*(i-128);
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1393 yuvtab_0c92[i]=-0x0c92*(i-128);
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1394 yuvtab_1a1e[i]=-0x1a1e*(i-128);
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1395 yuvtab_40cf[i]=0x40cf*(i-128);
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1396 }
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1397
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1398 }