Mercurial > mplayer.hg
annotate postproc/swscale_template.c @ 3319:66134af21278
fixed to check that SNDCTL_DSP_CHANNELS actually grants the requested number of channels
author | steve |
---|---|
date | Tue, 04 Dec 2001 17:54:08 +0000 |
parents | 5f4cf3b52d60 |
children | e87c59969d17 |
rev | line source |
---|---|
2216 | 1 |
2 // Software scaling and colorspace conversion routines for MPlayer | |
3 | |
2269 | 4 // Orginal C implementation by A'rpi/ESP-team <arpi@thot.banki.hu> |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
5 // current version mostly by Michael Niedermayer (michaelni@gmx.at) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
6 // the parts written by michael are under GNU GPL |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
7 |
3272 | 8 /* TODO |
9 Move static / global vars into a struct so multiple scalers can be used | |
10 write vertical cubic upscale / linear downscale stuff | |
11 */ | |
12 | |
2540 | 13 #undef MOVNTQ |
2680 | 14 #undef PAVGB |
3136 | 15 #undef PREFETCH |
16 #undef PREFETCHW | |
17 #undef EMMS | |
18 #undef SFENCE | |
19 | |
20 #ifdef HAVE_3DNOW | |
21 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ | |
22 #define EMMS "femms" | |
23 #else | |
24 #define EMMS "emms" | |
25 #endif | |
26 | |
27 #ifdef HAVE_3DNOW | |
28 #define PREFETCH "prefetch" | |
29 #define PREFETCHW "prefetchw" | |
30 #elif defined ( HAVE_MMX2 ) | |
31 #define PREFETCH "prefetchnta" | |
32 #define PREFETCHW "prefetcht0" | |
33 #else | |
34 #define PREFETCH "/nop" | |
35 #define PREFETCHW "/nop" | |
36 #endif | |
37 | |
38 #ifdef HAVE_MMX2 | |
39 #define SFENCE "sfence" | |
40 #else | |
41 #define SFENCE "/nop" | |
42 #endif | |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
43 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
44 #ifdef HAVE_MMX2 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
45 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
46 #elif defined (HAVE_3DNOW) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
47 #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
48 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
49 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
50 #ifdef HAVE_MMX2 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
51 #define MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
52 #else |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
53 #define MOVNTQ(a,b) "movq " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
54 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
55 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
56 #define FULL_YSCALEYUV2RGB \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
57 "pxor %%mm7, %%mm7 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
58 "movd %6, %%mm6 \n\t" /*yalpha1*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
59 "punpcklwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
60 "punpcklwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
61 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
62 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
63 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
64 "xorl %%eax, %%eax \n\t"\ |
2800
7847d6b7ad3d
.balign or weĦll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
65 ".balign 16 \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
66 "1: \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
67 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
68 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
69 "movq (%2, %%eax,2), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
70 "movq (%3, %%eax,2), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
71 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
72 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
73 "pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
74 "pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
75 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
76 "movq 4096(%2, %%eax,2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
77 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
78 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
79 "movq 4096(%3, %%eax,2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
80 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
81 "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
82 "psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
83 "psubw w400, %%mm3 \n\t" /* 8(U-128)*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
84 "pmulhw yCoeff, %%mm1 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
85 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
86 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
87 "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
88 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
89 "pmulhw ubCoeff, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
90 "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
91 "pmulhw ugCoeff, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
92 "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
93 "psubw w400, %%mm0 \n\t" /* (V-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
94 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
95 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
96 "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
97 "pmulhw vrCoeff, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
98 "pmulhw vgCoeff, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
99 "paddw %%mm1, %%mm3 \n\t" /* B*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
100 "paddw %%mm1, %%mm0 \n\t" /* R*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
101 "packuswb %%mm3, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
102 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
103 "packuswb %%mm0, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
104 "paddw %%mm4, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
105 "paddw %%mm2, %%mm1 \n\t" /* G*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
106 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
107 "packuswb %%mm1, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
108 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
109 #define YSCALEYUV2RGB \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
110 "movd %6, %%mm6 \n\t" /*yalpha1*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
111 "punpcklwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
112 "punpcklwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
113 "movq %%mm6, asm_yalpha1 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
114 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
115 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
116 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
117 "movq %%mm5, asm_uvalpha1 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
118 "xorl %%eax, %%eax \n\t"\ |
2800
7847d6b7ad3d
.balign or weĦll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
119 ".balign 16 \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
120 "1: \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
121 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
122 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
123 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
124 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
125 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
126 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
127 "movq asm_uvalpha1, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
128 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
129 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
130 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
131 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
132 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
133 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
134 "psubw w400, %%mm3 \n\t" /* (U-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
135 "psubw w400, %%mm4 \n\t" /* (V-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
136 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
137 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
138 "pmulhw ugCoeff, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
139 "pmulhw vgCoeff, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
140 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
141 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
142 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
143 "movq 8(%0, %%eax, 2), %%mm6 \n\t" /*buf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
144 "movq 8(%1, %%eax, 2), %%mm7 \n\t" /*buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
145 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
146 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
147 "pmulhw asm_yalpha1, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
148 "pmulhw asm_yalpha1, %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
149 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
150 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
151 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
152 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
153 "pmulhw ubCoeff, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
154 "pmulhw vrCoeff, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
155 "psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
156 "psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
157 "pmulhw yCoeff, %%mm1 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
158 "pmulhw yCoeff, %%mm7 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
159 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
160 "paddw %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
161 "movq %%mm2, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
162 "movq %%mm5, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
163 "movq %%mm4, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
164 "punpcklwd %%mm2, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
165 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
166 "punpcklwd %%mm4, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
167 "paddw %%mm1, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
168 "paddw %%mm1, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
169 "paddw %%mm1, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
170 "punpckhwd %%mm0, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
171 "punpckhwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
172 "punpckhwd %%mm3, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
173 "paddw %%mm7, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
174 "paddw %%mm7, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
175 "paddw %%mm7, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
176 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
177 "packuswb %%mm0, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
178 "packuswb %%mm6, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
179 "packuswb %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
180 "pxor %%mm7, %%mm7 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
181 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
182 #define YSCALEYUV2RGB1 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
183 "xorl %%eax, %%eax \n\t"\ |
2800
7847d6b7ad3d
.balign or weĦll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
184 ".balign 16 \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
185 "1: \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
186 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
187 "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
188 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
189 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
190 "psubw w400, %%mm3 \n\t" /* (U-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
191 "psubw w400, %%mm4 \n\t" /* (V-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
192 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
193 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
194 "pmulhw ugCoeff, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
195 "pmulhw vgCoeff, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
196 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
197 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
198 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
199 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
200 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
201 "pmulhw ubCoeff, %%mm2 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
202 "pmulhw vrCoeff, %%mm5 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
203 "psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
204 "psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
205 "pmulhw yCoeff, %%mm1 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
206 "pmulhw yCoeff, %%mm7 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
207 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
208 "paddw %%mm3, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
209 "movq %%mm2, %%mm0 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
210 "movq %%mm5, %%mm6 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
211 "movq %%mm4, %%mm3 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
212 "punpcklwd %%mm2, %%mm2 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
213 "punpcklwd %%mm5, %%mm5 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
214 "punpcklwd %%mm4, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
215 "paddw %%mm1, %%mm2 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
216 "paddw %%mm1, %%mm5 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
217 "paddw %%mm1, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
218 "punpckhwd %%mm0, %%mm0 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
219 "punpckhwd %%mm6, %%mm6 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
220 "punpckhwd %%mm3, %%mm3 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
221 "paddw %%mm7, %%mm0 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
222 "paddw %%mm7, %%mm6 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
223 "paddw %%mm7, %%mm3 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
224 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
225 "packuswb %%mm0, %%mm2 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
226 "packuswb %%mm6, %%mm5 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
227 "packuswb %%mm3, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
228 "pxor %%mm7, %%mm7 \n\t" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
229 |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
230 // do vertical chrominance interpolation |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
231 #define YSCALEYUV2RGB1b \ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
232 "xorl %%eax, %%eax \n\t"\ |
2800
7847d6b7ad3d
.balign or weĦll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
233 ".balign 16 \n\t"\ |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
234 "1: \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
235 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
236 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
237 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
238 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
2576 | 239 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ |
240 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ | |
241 "psrlw $5, %%mm3 \n\t"\ | |
242 "psrlw $5, %%mm4 \n\t"\ | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
243 "psubw w400, %%mm3 \n\t" /* (U-128)8*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
244 "psubw w400, %%mm4 \n\t" /* (V-128)8*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
245 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
246 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
247 "pmulhw ugCoeff, %%mm3 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
248 "pmulhw vgCoeff, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
249 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
250 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
251 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
252 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
253 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
254 "pmulhw ubCoeff, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
255 "pmulhw vrCoeff, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
256 "psubw w80, %%mm1 \n\t" /* 8(Y-16)*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
257 "psubw w80, %%mm7 \n\t" /* 8(Y-16)*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
258 "pmulhw yCoeff, %%mm1 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
259 "pmulhw yCoeff, %%mm7 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
260 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
261 "paddw %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
262 "movq %%mm2, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
263 "movq %%mm5, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
264 "movq %%mm4, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
265 "punpcklwd %%mm2, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
266 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
267 "punpcklwd %%mm4, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
268 "paddw %%mm1, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
269 "paddw %%mm1, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
270 "paddw %%mm1, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
271 "punpckhwd %%mm0, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
272 "punpckhwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
273 "punpckhwd %%mm3, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
274 "paddw %%mm7, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
275 "paddw %%mm7, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
276 "paddw %%mm7, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
277 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
278 "packuswb %%mm0, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
279 "packuswb %%mm6, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
280 "packuswb %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
281 "pxor %%mm7, %%mm7 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
282 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
283 #define WRITEBGR32 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
284 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
285 "movq %%mm2, %%mm1 \n\t" /* B */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
286 "movq %%mm5, %%mm6 \n\t" /* R */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
287 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
288 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
289 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
290 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
291 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
292 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
293 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
294 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
295 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
296 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
297 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
298 MOVNTQ(%%mm0, (%4, %%eax, 4))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
299 MOVNTQ(%%mm2, 8(%4, %%eax, 4))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
300 MOVNTQ(%%mm1, 16(%4, %%eax, 4))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
301 MOVNTQ(%%mm3, 24(%4, %%eax, 4))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
302 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
303 "addl $8, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
304 "cmpl %5, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
305 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
306 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
307 #define WRITEBGR16 \ |
2669 | 308 "pand bF8, %%mm2 \n\t" /* B */\ |
309 "pand bFC, %%mm4 \n\t" /* G */\ | |
310 "pand bF8, %%mm5 \n\t" /* R */\ | |
311 "psrlq $3, %%mm2 \n\t"\ | |
312 \ | |
313 "movq %%mm2, %%mm1 \n\t"\ | |
314 "movq %%mm4, %%mm3 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
315 \ |
2669 | 316 "punpcklbw %%mm7, %%mm3 \n\t"\ |
317 "punpcklbw %%mm5, %%mm2 \n\t"\ | |
318 "punpckhbw %%mm7, %%mm4 \n\t"\ | |
319 "punpckhbw %%mm5, %%mm1 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
320 \ |
2669 | 321 "psllq $3, %%mm3 \n\t"\ |
322 "psllq $3, %%mm4 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
323 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
324 "por %%mm3, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
325 "por %%mm4, %%mm1 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
326 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
327 MOVNTQ(%%mm2, (%4, %%eax, 2))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
328 MOVNTQ(%%mm1, 8(%4, %%eax, 2))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
329 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
330 "addl $8, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
331 "cmpl %5, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
332 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
333 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
334 #define WRITEBGR15 \ |
2669 | 335 "pand bF8, %%mm2 \n\t" /* B */\ |
336 "pand bF8, %%mm4 \n\t" /* G */\ | |
337 "pand bF8, %%mm5 \n\t" /* R */\ | |
338 "psrlq $3, %%mm2 \n\t"\ | |
339 "psrlq $1, %%mm5 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
340 \ |
2669 | 341 "movq %%mm2, %%mm1 \n\t"\ |
342 "movq %%mm4, %%mm3 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
343 \ |
2669 | 344 "punpcklbw %%mm7, %%mm3 \n\t"\ |
345 "punpcklbw %%mm5, %%mm2 \n\t"\ | |
346 "punpckhbw %%mm7, %%mm4 \n\t"\ | |
347 "punpckhbw %%mm5, %%mm1 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
348 \ |
2669 | 349 "psllq $2, %%mm3 \n\t"\ |
350 "psllq $2, %%mm4 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
351 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
352 "por %%mm3, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
353 "por %%mm4, %%mm1 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
354 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
355 MOVNTQ(%%mm2, (%4, %%eax, 2))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
356 MOVNTQ(%%mm1, 8(%4, %%eax, 2))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
357 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
358 "addl $8, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
359 "cmpl %5, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
360 " jb 1b \n\t" |
2669 | 361 |
2730 | 362 #define WRITEBGR24OLD \ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
363 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
364 "movq %%mm2, %%mm1 \n\t" /* B */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
365 "movq %%mm5, %%mm6 \n\t" /* R */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
366 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
367 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
368 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
369 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
370 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
371 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ |
2326 | 372 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ |
373 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ | |
374 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ | |
375 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
376 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
377 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
378 "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
379 "pand bm00000111, %%mm4 \n\t" /* 00000RGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
380 "pand bm11111000, %%mm0 \n\t" /* 00RGB000 0.5 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
381 "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
382 "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
383 "psllq $48, %%mm2 \n\t" /* GB000000 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
384 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
385 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
386 "movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
387 "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
388 "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
389 "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
390 "pand bm00001111, %%mm2 \n\t" /* 0000RGBR 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
391 "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
392 "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
393 "pand bm00000111, %%mm4 \n\t" /* 00000RGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
394 "pand bm11111000, %%mm1 \n\t" /* 00RGB000 2.5 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
395 "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
396 "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
397 "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
398 "por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
399 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
400 "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
401 "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
402 "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
403 "pand bm00000111, %%mm5 \n\t" /* 00000RGB 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
404 "pand bm11111000, %%mm3 \n\t" /* 00RGB000 3.5 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
405 "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
406 "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
407 "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
408 \ |
2728 | 409 MOVNTQ(%%mm0, (%%ebx))\ |
410 MOVNTQ(%%mm2, 8(%%ebx))\ | |
411 MOVNTQ(%%mm3, 16(%%ebx))\ | |
412 "addl $24, %%ebx \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
413 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
414 "addl $8, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
415 "cmpl %5, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
416 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
417 |
2730 | 418 #define WRITEBGR24MMX \ |
419 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ | |
420 "movq %%mm2, %%mm1 \n\t" /* B */\ | |
421 "movq %%mm5, %%mm6 \n\t" /* R */\ | |
422 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ | |
423 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ | |
424 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ | |
425 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ | |
426 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ | |
427 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ | |
428 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ | |
429 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ | |
430 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ | |
431 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ | |
432 \ | |
433 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ | |
434 "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\ | |
435 "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\ | |
436 "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\ | |
437 \ | |
438 "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\ | |
439 "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\ | |
440 "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\ | |
441 "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\ | |
442 \ | |
443 "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\ | |
444 "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\ | |
445 "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\ | |
446 "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\ | |
447 \ | |
448 "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\ | |
449 "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\ | |
450 "psllq $40, %%mm2 \n\t" /* GB000000 1 */\ | |
451 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\ | |
452 MOVNTQ(%%mm0, (%%ebx))\ | |
453 \ | |
454 "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\ | |
455 "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\ | |
456 "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\ | |
457 "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\ | |
458 MOVNTQ(%%mm6, 8(%%ebx))\ | |
459 \ | |
460 "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\ | |
461 "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\ | |
462 "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\ | |
463 MOVNTQ(%%mm5, 16(%%ebx))\ | |
464 \ | |
465 "addl $24, %%ebx \n\t"\ | |
466 \ | |
467 "addl $8, %%eax \n\t"\ | |
468 "cmpl %5, %%eax \n\t"\ | |
469 " jb 1b \n\t" | |
470 | |
471 #define WRITEBGR24MMX2 \ | |
472 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ | |
473 "movq M24A, %%mm0 \n\t"\ | |
474 "movq M24C, %%mm7 \n\t"\ | |
475 "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\ | |
476 "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\ | |
477 "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\ | |
478 \ | |
479 "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\ | |
480 "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\ | |
481 "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\ | |
482 \ | |
483 "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\ | |
484 "por %%mm1, %%mm6 \n\t"\ | |
485 "por %%mm3, %%mm6 \n\t"\ | |
486 MOVNTQ(%%mm6, (%%ebx))\ | |
487 \ | |
488 "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\ | |
489 "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\ | |
490 "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\ | |
491 "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\ | |
492 \ | |
493 "pand M24B, %%mm1 \n\t" /* B5 B4 B3 */\ | |
494 "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\ | |
495 "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\ | |
496 \ | |
497 "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\ | |
498 "por %%mm3, %%mm6 \n\t"\ | |
499 MOVNTQ(%%mm6, 8(%%ebx))\ | |
500 \ | |
501 "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\ | |
502 "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\ | |
503 "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\ | |
504 \ | |
505 "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\ | |
506 "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\ | |
507 "pand M24B, %%mm6 \n\t" /* R7 R6 R5 */\ | |
508 \ | |
509 "por %%mm1, %%mm3 \n\t"\ | |
510 "por %%mm3, %%mm6 \n\t"\ | |
511 MOVNTQ(%%mm6, 16(%%ebx))\ | |
512 \ | |
513 "addl $24, %%ebx \n\t"\ | |
514 \ | |
515 "addl $8, %%eax \n\t"\ | |
516 "cmpl %5, %%eax \n\t"\ | |
517 " jb 1b \n\t" | |
518 | |
519 #ifdef HAVE_MMX2 | |
3126 | 520 #undef WRITEBGR24 |
2730 | 521 #define WRITEBGR24 WRITEBGR24MMX2 |
522 #else | |
3126 | 523 #undef WRITEBGR24 |
2730 | 524 #define WRITEBGR24 WRITEBGR24MMX |
525 #endif | |
526 | |
3126 | 527 static inline void RENAME(yuv2yuv)(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, |
3209 | 528 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int yalpha, int uvalpha) |
2519 | 529 { |
530 int yalpha1=yalpha^4095; | |
531 int uvalpha1=uvalpha^4095; | |
532 int i; | |
533 | |
3126 | 534 #ifdef ARCH_X86 |
2521 | 535 asm volatile ("\n\t"::: "memory"); |
3126 | 536 #endif |
2521 | 537 |
3209 | 538 for(i=0;i<dstW;i++) |
2519 | 539 { |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
540 ((uint8_t*)dest)[i] = (buf0[i]*yalpha1+buf1[i]*yalpha)>>19; |
2519 | 541 } |
542 | |
543 if(uvalpha != -1) | |
544 { | |
3209 | 545 for(i=0; i<(dstW>>1); i++) |
2519 | 546 { |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
547 ((uint8_t*)uDest)[i] = (uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19; |
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
548 ((uint8_t*)vDest)[i] = (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19; |
2519 | 549 } |
550 } | |
551 } | |
552 | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
553 /** |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
554 * vertical scale YV12 to RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
555 */ |
3126 | 556 static inline void RENAME(yuv2rgbX)(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, |
3209 | 557 uint8_t *dest, int dstW, int yalpha, int uvalpha, int dstbpp) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
558 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
559 int yalpha1=yalpha^4095; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
560 int uvalpha1=uvalpha^4095; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
561 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
562 if(fullUVIpol) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
563 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
564 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
565 #ifdef HAVE_MMX |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
566 if(dstbpp == 32) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
567 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
568 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
569 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
570 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
571 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
572 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
573 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
574 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
575 "movq %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
576 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
577 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
578 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
579 MOVNTQ(%%mm3, (%4, %%eax, 4)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
580 MOVNTQ(%%mm1, 8(%4, %%eax, 4)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
581 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
582 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
583 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
584 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
585 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
586 |
3209 | 587 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
588 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
589 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
590 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
591 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
592 else if(dstbpp==24) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
593 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
594 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
595 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
596 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
597 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
598 // lsb ... msb |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
599 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
600 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
601 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
602 "movq %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
603 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
604 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
605 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
606 "movq %%mm3, %%mm2 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
607 "psrlq $8, %%mm3 \n\t" // GR0BGR00 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
608 "pand bm00000111, %%mm2 \n\t" // BGR00000 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
609 "pand bm11111000, %%mm3 \n\t" // 000BGR00 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
610 "por %%mm2, %%mm3 \n\t" // BGRBGR00 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
611 "movq %%mm1, %%mm2 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
612 "psllq $48, %%mm1 \n\t" // 000000BG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
613 "por %%mm1, %%mm3 \n\t" // BGRBGRBG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
614 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
615 "movq %%mm2, %%mm1 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
616 "psrld $16, %%mm2 \n\t" // R000R000 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
617 "psrlq $24, %%mm1 \n\t" // 0BGR0000 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
618 "por %%mm2, %%mm1 \n\t" // RBGRR000 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
619 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
620 "movl %4, %%ebx \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
621 "addl %%eax, %%ebx \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
622 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
623 #ifdef HAVE_MMX2 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
624 //FIXME Alignment |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
625 "movntq %%mm3, (%%ebx, %%eax, 2)\n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
626 "movntq %%mm1, 8(%%ebx, %%eax, 2)\n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
627 #else |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
628 "movd %%mm3, (%%ebx, %%eax, 2) \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
629 "psrlq $32, %%mm3 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
630 "movd %%mm3, 4(%%ebx, %%eax, 2) \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
631 "movd %%mm1, 8(%%ebx, %%eax, 2) \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
632 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
633 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
634 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
635 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
636 |
3209 | 637 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
638 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
639 : "%eax", "%ebx" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
640 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
641 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
642 else if(dstbpp==15) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
643 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
644 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
645 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
646 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
647 #ifdef DITHER1XBPP |
2748 | 648 "paddusb g5Dither, %%mm1 \n\t" |
649 "paddusb r5Dither, %%mm0 \n\t" | |
650 "paddusb b5Dither, %%mm3 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
651 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
652 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
653 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
654 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
655 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
656 "psrlw $3, %%mm3 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
657 "psllw $2, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
658 "psllw $7, %%mm0 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
659 "pand g15Mask, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
660 "pand r15Mask, %%mm0 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
661 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
662 "por %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
663 "por %%mm1, %%mm0 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
664 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
665 MOVNTQ(%%mm0, (%4, %%eax, 2)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
666 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
667 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
668 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
669 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
670 |
3209 | 671 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
672 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
673 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
674 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
675 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
676 else if(dstbpp==16) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
677 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
678 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
679 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
680 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
681 #ifdef DITHER1XBPP |
2748 | 682 "paddusb g6Dither, %%mm1 \n\t" |
683 "paddusb r5Dither, %%mm0 \n\t" | |
684 "paddusb b5Dither, %%mm3 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
685 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
686 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
687 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
688 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
689 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
690 "psrlw $3, %%mm3 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
691 "psllw $3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
692 "psllw $8, %%mm0 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
693 "pand g16Mask, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
694 "pand r16Mask, %%mm0 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
695 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
696 "por %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
697 "por %%mm1, %%mm0 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
698 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
699 MOVNTQ(%%mm0, (%4, %%eax, 2)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
700 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
701 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
702 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
703 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
704 |
3209 | 705 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
706 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
707 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
708 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
709 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
710 #else |
2521 | 711 asm volatile ("\n\t"::: "memory"); |
712 | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
713 if(dstbpp==32 || dstbpp==24) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
714 { |
2671 | 715 int i; |
3209 | 716 for(i=0;i<dstW;i++){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
717 // vertical linear interpolation && yuv2rgb in a single step: |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
718 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
719 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
720 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
2503 | 721 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; |
722 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; | |
723 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
724 dest+=dstbpp>>3; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
725 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
726 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
727 else if(dstbpp==16) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
728 { |
2671 | 729 int i; |
3209 | 730 for(i=0;i<dstW;i++){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
731 // vertical linear interpolation && yuv2rgb in a single step: |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
732 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
733 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
734 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
735 |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
736 ((uint16_t*)dest)[i] = |
2584 | 737 clip_table16b[(Y + yuvtab_40cf[U]) >>13] | |
738 clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] | | |
739 clip_table16r[(Y + yuvtab_3343[V]) >>13]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
740 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
741 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
742 else if(dstbpp==15) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
743 { |
2671 | 744 int i; |
3209 | 745 for(i=0;i<dstW;i++){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
746 // vertical linear interpolation && yuv2rgb in a single step: |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
747 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
748 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
749 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
750 |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
751 ((uint16_t*)dest)[i] = |
2584 | 752 clip_table15b[(Y + yuvtab_40cf[U]) >>13] | |
753 clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] | | |
754 clip_table15r[(Y + yuvtab_3343[V]) >>13]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
755 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
756 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
757 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
758 }//FULL_UV_IPOL |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
759 else |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
760 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
761 #ifdef HAVE_MMX |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
762 if(dstbpp == 32) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
763 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
764 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
765 YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
766 WRITEBGR32 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
767 |
3209 | 768 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
769 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
770 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
771 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
772 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
773 else if(dstbpp==24) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
774 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
775 asm volatile( |
2728 | 776 "movl %4, %%ebx \n\t" |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
777 YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
778 WRITEBGR24 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
779 |
3209 | 780 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
781 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
782 : "%eax", "%ebx" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
783 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
784 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
785 else if(dstbpp==15) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
786 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
787 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
788 YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
789 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
790 #ifdef DITHER1XBPP |
2748 | 791 "paddusb b5Dither, %%mm2 \n\t" |
792 "paddusb g5Dither, %%mm4 \n\t" | |
793 "paddusb r5Dither, %%mm5 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
794 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
795 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
796 WRITEBGR15 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
797 |
3209 | 798 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
799 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
800 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
801 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
802 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
803 else if(dstbpp==16) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
804 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
805 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
806 YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
807 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
808 #ifdef DITHER1XBPP |
2748 | 809 "paddusb b5Dither, %%mm2 \n\t" |
810 "paddusb g6Dither, %%mm4 \n\t" | |
811 "paddusb r5Dither, %%mm5 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
812 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
813 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
814 WRITEBGR16 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
815 |
3209 | 816 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
817 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
818 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
819 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
820 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
821 #else |
2521 | 822 asm volatile ("\n\t"::: "memory"); |
823 | |
2575 | 824 if(dstbpp==32) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
825 { |
2671 | 826 int i; |
3209 | 827 for(i=0; i<dstW-1; i+=2){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
828 // vertical linear interpolation && yuv2rgb in a single step: |
2575 | 829 int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
830 int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)]; | |
2585 | 831 int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); |
832 int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); | |
2575 | 833 |
834 int Cb= yuvtab_40cf[U]; | |
835 int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; | |
836 int Cr= yuvtab_3343[V]; | |
837 | |
838 dest[4*i+0]=clip_table[((Y1 + Cb) >>13)]; | |
839 dest[4*i+1]=clip_table[((Y1 + Cg) >>13)]; | |
840 dest[4*i+2]=clip_table[((Y1 + Cr) >>13)]; | |
841 | |
842 dest[4*i+4]=clip_table[((Y2 + Cb) >>13)]; | |
843 dest[4*i+5]=clip_table[((Y2 + Cg) >>13)]; | |
844 dest[4*i+6]=clip_table[((Y2 + Cr) >>13)]; | |
845 } | |
846 } | |
847 if(dstbpp==24) | |
848 { | |
2671 | 849 int i; |
3209 | 850 for(i=0; i<dstW-1; i+=2){ |
2575 | 851 // vertical linear interpolation && yuv2rgb in a single step: |
852 int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; | |
853 int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)]; | |
2585 | 854 int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); |
855 int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); | |
2575 | 856 |
857 int Cb= yuvtab_40cf[U]; | |
858 int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; | |
859 int Cr= yuvtab_3343[V]; | |
860 | |
861 dest[0]=clip_table[((Y1 + Cb) >>13)]; | |
862 dest[1]=clip_table[((Y1 + Cg) >>13)]; | |
863 dest[2]=clip_table[((Y1 + Cr) >>13)]; | |
864 | |
865 dest[3]=clip_table[((Y2 + Cb) >>13)]; | |
866 dest[4]=clip_table[((Y2 + Cg) >>13)]; | |
867 dest[5]=clip_table[((Y2 + Cr) >>13)]; | |
868 dest+=6; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
869 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
870 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
871 else if(dstbpp==16) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
872 { |
2671 | 873 int i; |
3209 | 874 for(i=0; i<dstW-1; i+=2){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
875 // vertical linear interpolation && yuv2rgb in a single step: |
2575 | 876 int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
877 int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)]; | |
2585 | 878 int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); |
879 int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
880 |
2575 | 881 int Cb= yuvtab_40cf[U]; |
882 int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; | |
883 int Cr= yuvtab_3343[V]; | |
884 | |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
885 ((uint16_t*)dest)[i] = |
2584 | 886 clip_table16b[(Y1 + Cb) >>13] | |
887 clip_table16g[(Y1 + Cg) >>13] | | |
888 clip_table16r[(Y1 + Cr) >>13]; | |
2575 | 889 |
890 ((uint16_t*)dest)[i+1] = | |
2584 | 891 clip_table16b[(Y2 + Cb) >>13] | |
892 clip_table16g[(Y2 + Cg) >>13] | | |
893 clip_table16r[(Y2 + Cr) >>13]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
894 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
895 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
896 else if(dstbpp==15) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
897 { |
2671 | 898 int i; |
3209 | 899 for(i=0; i<dstW-1; i+=2){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
900 // vertical linear interpolation && yuv2rgb in a single step: |
2575 | 901 int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
902 int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)]; | |
2585 | 903 int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); |
904 int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
905 |
2575 | 906 int Cb= yuvtab_40cf[U]; |
907 int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; | |
908 int Cr= yuvtab_3343[V]; | |
909 | |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
910 ((uint16_t*)dest)[i] = |
2584 | 911 clip_table15b[(Y1 + Cb) >>13] | |
912 clip_table15g[(Y1 + Cg) >>13] | | |
913 clip_table15r[(Y1 + Cr) >>13]; | |
914 | |
2575 | 915 ((uint16_t*)dest)[i+1] = |
2584 | 916 clip_table15b[(Y2 + Cb) >>13] | |
917 clip_table15g[(Y2 + Cg) >>13] | | |
918 clip_table15r[(Y2 + Cr) >>13]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
919 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
920 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
921 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
922 } //!FULL_UV_IPOL |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
923 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
924 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
925 /** |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
926 * YV12 to RGB without scaling or interpolating |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
927 */ |
3126 | 928 static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, |
3209 | 929 uint8_t *dest, int dstW, int yalpha, int uvalpha, int dstbpp) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
930 { |
2671 | 931 int uvalpha1=uvalpha^4095; |
932 #ifdef HAVE_MMX | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
933 int yalpha1=yalpha^4095; |
2671 | 934 #endif |
935 | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
936 if(fullUVIpol || allwaysIpol) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
937 { |
3209 | 938 RENAME(yuv2rgbX)(buf0, buf1, uvbuf0, uvbuf1, dest, dstW, yalpha, uvalpha, dstbpp); |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
939 return; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
940 } |
2576 | 941 if( yalpha > 2048 ) buf0 = buf1; |
942 | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
943 #ifdef HAVE_MMX |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
944 if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
945 { |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
946 if(dstbpp == 32) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
947 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
948 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
949 YSCALEYUV2RGB1 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
950 WRITEBGR32 |
3209 | 951 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
952 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
953 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
954 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
955 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
956 else if(dstbpp==24) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
957 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
958 asm volatile( |
2728 | 959 "movl %4, %%ebx \n\t" |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
960 YSCALEYUV2RGB1 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
961 WRITEBGR24 |
3209 | 962 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
963 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
964 : "%eax", "%ebx" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
965 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
966 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
967 else if(dstbpp==15) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
968 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
969 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
970 YSCALEYUV2RGB1 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
971 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
972 #ifdef DITHER1XBPP |
2748 | 973 "paddusb b5Dither, %%mm2 \n\t" |
974 "paddusb g5Dither, %%mm4 \n\t" | |
975 "paddusb r5Dither, %%mm5 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
976 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
977 WRITEBGR15 |
3209 | 978 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
979 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
980 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
981 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
982 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
983 else if(dstbpp==16) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
984 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
985 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
986 YSCALEYUV2RGB1 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
987 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
988 #ifdef DITHER1XBPP |
2748 | 989 "paddusb b5Dither, %%mm2 \n\t" |
990 "paddusb g6Dither, %%mm4 \n\t" | |
991 "paddusb r5Dither, %%mm5 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
992 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
993 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
994 WRITEBGR16 |
3209 | 995 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
996 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
997 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
998 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
999 } |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1000 } |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1001 else |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1002 { |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1003 if(dstbpp == 32) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1004 { |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1005 asm volatile( |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1006 YSCALEYUV2RGB1b |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1007 WRITEBGR32 |
3209 | 1008 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1009 "m" (yalpha1), "m" (uvalpha1) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1010 : "%eax" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1011 ); |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1012 } |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1013 else if(dstbpp==24) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1014 { |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1015 asm volatile( |
2728 | 1016 "movl %4, %%ebx \n\t" |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1017 YSCALEYUV2RGB1b |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1018 WRITEBGR24 |
3209 | 1019 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1020 "m" (yalpha1), "m" (uvalpha1) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1021 : "%eax", "%ebx" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1022 ); |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1023 } |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1024 else if(dstbpp==15) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1025 { |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1026 asm volatile( |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1027 YSCALEYUV2RGB1b |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1028 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1029 #ifdef DITHER1XBPP |
2748 | 1030 "paddusb b5Dither, %%mm2 \n\t" |
1031 "paddusb g5Dither, %%mm4 \n\t" | |
1032 "paddusb r5Dither, %%mm5 \n\t" | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1033 #endif |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1034 WRITEBGR15 |
3209 | 1035 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1036 "m" (yalpha1), "m" (uvalpha1) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1037 : "%eax" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1038 ); |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1039 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1040 else if(dstbpp==16) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1041 { |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1042 asm volatile( |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1043 YSCALEYUV2RGB1b |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1044 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1045 #ifdef DITHER1XBPP |
2748 | 1046 "paddusb b5Dither, %%mm2 \n\t" |
1047 "paddusb g6Dither, %%mm4 \n\t" | |
1048 "paddusb r5Dither, %%mm5 \n\t" | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1049 #endif |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1050 |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1051 WRITEBGR16 |
3209 | 1052 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1053 "m" (yalpha1), "m" (uvalpha1) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1054 : "%eax" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1055 ); |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1056 } |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1057 } |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1058 #else |
2576 | 1059 //FIXME write 2 versions (for even & odd lines) |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1060 asm volatile ("\n\t"::: "memory"); |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1061 |
2576 | 1062 if(dstbpp==32) |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1063 { |
2671 | 1064 int i; |
3209 | 1065 for(i=0; i<dstW-1; i+=2){ |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1066 // vertical linear interpolation && yuv2rgb in a single step: |
2576 | 1067 int Y1=yuvtab_2568[buf0[i]>>7]; |
1068 int Y2=yuvtab_2568[buf0[i+1]>>7]; | |
2585 | 1069 int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); |
1070 int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); | |
2576 | 1071 |
1072 int Cb= yuvtab_40cf[U]; | |
1073 int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; | |
1074 int Cr= yuvtab_3343[V]; | |
1075 | |
1076 dest[4*i+0]=clip_table[((Y1 + Cb) >>13)]; | |
1077 dest[4*i+1]=clip_table[((Y1 + Cg) >>13)]; | |
1078 dest[4*i+2]=clip_table[((Y1 + Cr) >>13)]; | |
1079 | |
1080 dest[4*i+4]=clip_table[((Y2 + Cb) >>13)]; | |
1081 dest[4*i+5]=clip_table[((Y2 + Cg) >>13)]; | |
1082 dest[4*i+6]=clip_table[((Y2 + Cr) >>13)]; | |
1083 } | |
1084 } | |
1085 if(dstbpp==24) | |
1086 { | |
2671 | 1087 int i; |
3209 | 1088 for(i=0; i<dstW-1; i+=2){ |
2576 | 1089 // vertical linear interpolation && yuv2rgb in a single step: |
1090 int Y1=yuvtab_2568[buf0[i]>>7]; | |
1091 int Y2=yuvtab_2568[buf0[i+1]>>7]; | |
2585 | 1092 int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); |
1093 int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); | |
2576 | 1094 |
1095 int Cb= yuvtab_40cf[U]; | |
1096 int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; | |
1097 int Cr= yuvtab_3343[V]; | |
1098 | |
1099 dest[0]=clip_table[((Y1 + Cb) >>13)]; | |
1100 dest[1]=clip_table[((Y1 + Cg) >>13)]; | |
1101 dest[2]=clip_table[((Y1 + Cr) >>13)]; | |
1102 | |
1103 dest[3]=clip_table[((Y2 + Cb) >>13)]; | |
1104 dest[4]=clip_table[((Y2 + Cg) >>13)]; | |
1105 dest[5]=clip_table[((Y2 + Cr) >>13)]; | |
1106 dest+=6; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1107 } |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1108 } |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1109 else if(dstbpp==16) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1110 { |
2671 | 1111 int i; |
3209 | 1112 for(i=0; i<dstW-1; i+=2){ |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1113 // vertical linear interpolation && yuv2rgb in a single step: |
2576 | 1114 int Y1=yuvtab_2568[buf0[i]>>7]; |
1115 int Y2=yuvtab_2568[buf0[i+1]>>7]; | |
2585 | 1116 int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); |
1117 int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1118 |
2576 | 1119 int Cb= yuvtab_40cf[U]; |
1120 int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; | |
1121 int Cr= yuvtab_3343[V]; | |
1122 | |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
1123 ((uint16_t*)dest)[i] = |
2584 | 1124 clip_table16b[(Y1 + Cb) >>13] | |
1125 clip_table16g[(Y1 + Cg) >>13] | | |
1126 clip_table16r[(Y1 + Cr) >>13]; | |
2576 | 1127 |
1128 ((uint16_t*)dest)[i+1] = | |
2584 | 1129 clip_table16b[(Y2 + Cb) >>13] | |
1130 clip_table16g[(Y2 + Cg) >>13] | | |
1131 clip_table16r[(Y2 + Cr) >>13]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1132 } |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1133 } |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1134 else if(dstbpp==15) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1135 { |
2671 | 1136 int i; |
3209 | 1137 for(i=0; i<dstW-1; i+=2){ |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1138 // vertical linear interpolation && yuv2rgb in a single step: |
2576 | 1139 int Y1=yuvtab_2568[buf0[i]>>7]; |
1140 int Y2=yuvtab_2568[buf0[i+1]>>7]; | |
2585 | 1141 int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); |
1142 int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1143 |
2576 | 1144 int Cb= yuvtab_40cf[U]; |
1145 int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; | |
1146 int Cr= yuvtab_3343[V]; | |
1147 | |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
1148 ((uint16_t*)dest)[i] = |
2584 | 1149 clip_table15b[(Y1 + Cb) >>13] | |
1150 clip_table15g[(Y1 + Cg) >>13] | | |
1151 clip_table15r[(Y1 + Cr) >>13]; | |
1152 | |
2576 | 1153 ((uint16_t*)dest)[i+1] = |
2584 | 1154 clip_table15b[(Y2 + Cb) >>13] | |
1155 clip_table15g[(Y2 + Cg) >>13] | | |
1156 clip_table15r[(Y2 + Cr) >>13]; | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1157 } |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1158 } |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1159 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1160 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1161 |
3272 | 1162 // Bilinear / Bicubic scaling |
1163 static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc, | |
1164 int16_t *filter, int16_t *filterPos, int filterSize) | |
1165 { | |
1166 #ifdef HAVE_MMX | |
1167 if(filterSize==4) // allways true for upscaling, sometimes for down too | |
1168 { | |
1169 int counter= -2*dstW; | |
1170 filter-= counter*2; | |
1171 filterPos-= counter/2; | |
1172 dst-= counter/2; | |
1173 asm volatile( | |
1174 "pxor %%mm7, %%mm7 \n\t" | |
1175 "movq w02, %%mm6 \n\t" | |
1176 "pushl %%ebp \n\t" // we use 7 regs here ... | |
1177 "movl %%eax, %%ebp \n\t" | |
1178 ".balign 16 \n\t" | |
1179 "1: \n\t" | |
1180 "movzwl (%2, %%ebp), %%eax \n\t" | |
1181 "movzwl 2(%2, %%ebp), %%ebx \n\t" | |
1182 "movq (%1, %%ebp, 4), %%mm1 \n\t" | |
1183 "movq 8(%1, %%ebp, 4), %%mm3 \n\t" | |
1184 "movd (%3, %%eax), %%mm0 \n\t" | |
1185 "movd (%3, %%ebx), %%mm2 \n\t" | |
1186 "punpcklbw %%mm7, %%mm0 \n\t" | |
1187 "punpcklbw %%mm7, %%mm2 \n\t" | |
1188 "pmaddwd %%mm1, %%mm0 \n\t" | |
1189 "pmaddwd %%mm2, %%mm3 \n\t" | |
1190 "psrad $8, %%mm0 \n\t" | |
1191 "psrad $8, %%mm3 \n\t" | |
1192 "packssdw %%mm3, %%mm0 \n\t" | |
1193 "pmaddwd %%mm6, %%mm0 \n\t" | |
1194 "packssdw %%mm0, %%mm0 \n\t" | |
1195 "movd %%mm0, (%4, %%ebp) \n\t" | |
1196 "addl $4, %%ebp \n\t" | |
1197 " jnc 1b \n\t" | |
1198 | |
1199 "popl %%ebp \n\t" | |
1200 : "+a" (counter) | |
1201 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst) | |
1202 : "%ebx" | |
1203 ); | |
1204 } | |
1205 else if(filterSize==8) | |
1206 { | |
1207 int counter= -2*dstW; | |
1208 filter-= counter*4; | |
1209 filterPos-= counter/2; | |
1210 dst-= counter/2; | |
1211 asm volatile( | |
1212 "pxor %%mm7, %%mm7 \n\t" | |
1213 "movq w02, %%mm6 \n\t" | |
1214 "pushl %%ebp \n\t" // we use 7 regs here ... | |
1215 "movl %%eax, %%ebp \n\t" | |
1216 ".balign 16 \n\t" | |
1217 "1: \n\t" | |
1218 "movzwl (%2, %%ebp), %%eax \n\t" | |
1219 "movzwl 2(%2, %%ebp), %%ebx \n\t" | |
1220 "movq (%1, %%ebp, 8), %%mm1 \n\t" | |
1221 "movq 16(%1, %%ebp, 8), %%mm3 \n\t" | |
1222 "movd (%3, %%eax), %%mm0 \n\t" | |
1223 "movd (%3, %%ebx), %%mm2 \n\t" | |
1224 "punpcklbw %%mm7, %%mm0 \n\t" | |
1225 "punpcklbw %%mm7, %%mm2 \n\t" | |
1226 "pmaddwd %%mm1, %%mm0 \n\t" | |
1227 "pmaddwd %%mm2, %%mm3 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1228 |
3272 | 1229 "movq 8(%1, %%ebp, 8), %%mm1 \n\t" |
1230 "movq 24(%1, %%ebp, 8), %%mm5 \n\t" | |
1231 "movd 4(%3, %%eax), %%mm4 \n\t" | |
1232 "movd 4(%3, %%ebx), %%mm2 \n\t" | |
1233 "punpcklbw %%mm7, %%mm4 \n\t" | |
1234 "punpcklbw %%mm7, %%mm2 \n\t" | |
1235 "pmaddwd %%mm1, %%mm4 \n\t" | |
1236 "pmaddwd %%mm2, %%mm5 \n\t" | |
1237 "paddd %%mm4, %%mm0 \n\t" | |
1238 "paddd %%mm5, %%mm3 \n\t" | |
1239 | |
1240 "psrad $8, %%mm0 \n\t" | |
1241 "psrad $8, %%mm3 \n\t" | |
1242 "packssdw %%mm3, %%mm0 \n\t" | |
1243 "pmaddwd %%mm6, %%mm0 \n\t" | |
1244 "packssdw %%mm0, %%mm0 \n\t" | |
1245 "movd %%mm0, (%4, %%ebp) \n\t" | |
1246 "addl $4, %%ebp \n\t" | |
1247 " jnc 1b \n\t" | |
1248 | |
1249 "popl %%ebp \n\t" | |
1250 : "+a" (counter) | |
1251 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst) | |
1252 : "%ebx" | |
1253 ); | |
1254 } | |
1255 else | |
1256 { | |
1257 int counter= -2*dstW; | |
1258 // filter-= counter*filterSize/2; | |
1259 filterPos-= counter/2; | |
1260 dst-= counter/2; | |
1261 asm volatile( | |
1262 "pxor %%mm7, %%mm7 \n\t" | |
1263 "movq w02, %%mm6 \n\t" | |
1264 ".balign 16 \n\t" | |
1265 "1: \n\t" | |
1266 "movl %2, %%ecx \n\t" | |
1267 "movzwl (%%ecx, %0), %%eax \n\t" | |
1268 "movzwl 2(%%ecx, %0), %%ebx \n\t" | |
1269 "movl %5, %%ecx \n\t" | |
1270 "pxor %%mm4, %%mm4 \n\t" | |
1271 "pxor %%mm5, %%mm5 \n\t" | |
1272 "2: \n\t" | |
1273 "movq (%1), %%mm1 \n\t" | |
1274 "movq (%1, %6), %%mm3 \n\t" | |
1275 "movd (%%ecx, %%eax), %%mm0 \n\t" | |
1276 "movd (%%ecx, %%ebx), %%mm2 \n\t" | |
1277 "punpcklbw %%mm7, %%mm0 \n\t" | |
1278 "punpcklbw %%mm7, %%mm2 \n\t" | |
1279 "pmaddwd %%mm1, %%mm0 \n\t" | |
1280 "pmaddwd %%mm2, %%mm3 \n\t" | |
1281 "paddd %%mm3, %%mm5 \n\t" | |
1282 "paddd %%mm0, %%mm4 \n\t" | |
1283 "addl $8, %1 \n\t" | |
1284 "addl $4, %%ecx \n\t" | |
1285 "cmpl %4, %%ecx \n\t" | |
1286 " jb 2b \n\t" | |
1287 "addl %6, %1 \n\t" | |
1288 "psrad $8, %%mm4 \n\t" | |
1289 "psrad $8, %%mm5 \n\t" | |
1290 "packssdw %%mm5, %%mm4 \n\t" | |
1291 "pmaddwd %%mm6, %%mm4 \n\t" | |
1292 "packssdw %%mm4, %%mm4 \n\t" | |
1293 "movl %3, %%eax \n\t" | |
1294 "movd %%mm4, (%%eax, %0) \n\t" | |
1295 "addl $4, %0 \n\t" | |
1296 " jnc 1b \n\t" | |
1297 | |
1298 : "+r" (counter) | |
1299 : "r" (filter), "m" (filterPos), "m" (dst), "m"(src+filterSize), | |
1300 "m" (src), "r" (filterSize*2) | |
3299 | 1301 : "%ebx", "%eax", "%ecx" |
3272 | 1302 ); |
1303 } | |
1304 #else | |
1305 int i; | |
1306 for(i=0; i<dstW; i++) | |
1307 { | |
1308 int j; | |
1309 int srcPos= filterPos[i]; | |
1310 int val=0; | |
1311 // printf("filterPos: %d\n", hFilterPos[i]); | |
1312 for(j=0; j<filterSize; j++) | |
1313 { | |
1314 // printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]); | |
1315 val += ((int)src[srcPos + j])*filter[filterSize*i + j]; | |
1316 } | |
1317 // filter += hFilterSize; | |
1318 dst[i] = MIN(MAX(0, val>>7), (1<<15)-1); // the cubic equation does overflow ... | |
1319 // dst[i] = val>>7; | |
1320 } | |
1321 #endif | |
1322 } | |
1323 // *** horizontal scale Y line to temp buffer | |
3215 | 1324 static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, int srcW, int xInc) |
2469 | 1325 { |
3272 | 1326 if(sws_flags != SWS_FAST_BILINEAR) |
1327 { | |
1328 RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize); | |
1329 } | |
1330 else // Fast Bilinear upscale / crap downscale | |
1331 { | |
2469 | 1332 #ifdef ARCH_X86 |
1333 #ifdef HAVE_MMX2 | |
2671 | 1334 int i; |
2469 | 1335 if(canMMX2BeUsed) |
1336 { | |
1337 asm volatile( | |
1338 "pxor %%mm7, %%mm7 \n\t" | |
1339 "pxor %%mm2, %%mm2 \n\t" // 2*xalpha | |
1340 "movd %5, %%mm6 \n\t" // xInc&0xFFFF | |
1341 "punpcklwd %%mm6, %%mm6 \n\t" | |
1342 "punpcklwd %%mm6, %%mm6 \n\t" | |
1343 "movq %%mm6, %%mm2 \n\t" | |
1344 "psllq $16, %%mm2 \n\t" | |
1345 "paddw %%mm6, %%mm2 \n\t" | |
1346 "psllq $16, %%mm2 \n\t" | |
1347 "paddw %%mm6, %%mm2 \n\t" | |
1348 "psllq $16, %%mm2 \n\t" //0,t,2t,3t t=xInc&0xFF | |
1349 "movq %%mm2, temp0 \n\t" | |
1350 "movd %4, %%mm6 \n\t" //(xInc*4)&0xFFFF | |
1351 "punpcklwd %%mm6, %%mm6 \n\t" | |
1352 "punpcklwd %%mm6, %%mm6 \n\t" | |
1353 "xorl %%eax, %%eax \n\t" // i | |
1354 "movl %0, %%esi \n\t" // src | |
1355 "movl %1, %%edi \n\t" // buf1 | |
1356 "movl %3, %%edx \n\t" // (xInc*4)>>16 | |
1357 "xorl %%ecx, %%ecx \n\t" | |
1358 "xorl %%ebx, %%ebx \n\t" | |
1359 "movw %4, %%bx \n\t" // (xInc*4)&0xFFFF | |
2520 | 1360 |
2469 | 1361 #define FUNNY_Y_CODE \ |
2520 | 1362 PREFETCH" 1024(%%esi) \n\t"\ |
1363 PREFETCH" 1056(%%esi) \n\t"\ | |
1364 PREFETCH" 1088(%%esi) \n\t"\ | |
2469 | 1365 "call funnyYCode \n\t"\ |
1366 "movq temp0, %%mm2 \n\t"\ | |
1367 "xorl %%ecx, %%ecx \n\t" | |
2520 | 1368 |
2469 | 1369 FUNNY_Y_CODE |
1370 FUNNY_Y_CODE | |
1371 FUNNY_Y_CODE | |
1372 FUNNY_Y_CODE | |
1373 FUNNY_Y_CODE | |
1374 FUNNY_Y_CODE | |
1375 FUNNY_Y_CODE | |
1376 FUNNY_Y_CODE | |
1377 | |
1378 :: "m" (src), "m" (dst), "m" (dstWidth), "m" ((xInc*4)>>16), | |
1379 "m" ((xInc*4)&0xFFFF), "m" (xInc&0xFFFF) | |
1380 : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" | |
1381 ); | |
3215 | 1382 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128; |
2469 | 1383 } |
1384 else | |
1385 { | |
1386 #endif | |
1387 //NO MMX just normal asm ... | |
1388 asm volatile( | |
1389 "xorl %%eax, %%eax \n\t" // i | |
1390 "xorl %%ebx, %%ebx \n\t" // xx | |
1391 "xorl %%ecx, %%ecx \n\t" // 2*xalpha | |
2800
7847d6b7ad3d
.balign or weĦll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
1392 ".balign 16 \n\t" |
2469 | 1393 "1: \n\t" |
1394 "movzbl (%0, %%ebx), %%edi \n\t" //src[xx] | |
1395 "movzbl 1(%0, %%ebx), %%esi \n\t" //src[xx+1] | |
1396 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
1397 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
1398 "shll $16, %%edi \n\t" | |
1399 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
1400 "movl %1, %%edi \n\t" | |
1401 "shrl $9, %%esi \n\t" | |
1402 "movw %%si, (%%edi, %%eax, 2) \n\t" | |
1403 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF | |
1404 "adcl %3, %%ebx \n\t" //xx+= xInc>>8 + carry | |
1405 | |
1406 "movzbl (%0, %%ebx), %%edi \n\t" //src[xx] | |
1407 "movzbl 1(%0, %%ebx), %%esi \n\t" //src[xx+1] | |
1408 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
1409 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
1410 "shll $16, %%edi \n\t" | |
1411 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
1412 "movl %1, %%edi \n\t" | |
1413 "shrl $9, %%esi \n\t" | |
1414 "movw %%si, 2(%%edi, %%eax, 2) \n\t" | |
1415 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF | |
1416 "adcl %3, %%ebx \n\t" //xx+= xInc>>8 + carry | |
1417 | |
1418 | |
1419 "addl $2, %%eax \n\t" | |
1420 "cmpl %2, %%eax \n\t" | |
1421 " jb 1b \n\t" | |
1422 | |
1423 | |
1424 :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc>>16), "m" (xInc&0xFFFF) | |
1425 : "%eax", "%ebx", "%ecx", "%edi", "%esi" | |
1426 ); | |
1427 #ifdef HAVE_MMX2 | |
1428 } //if MMX2 cant be used | |
1429 #endif | |
1430 #else | |
2671 | 1431 int i; |
1432 unsigned int xpos=0; | |
1433 for(i=0;i<dstWidth;i++) | |
1434 { | |
1435 register unsigned int xx=xpos>>16; | |
1436 register unsigned int xalpha=(xpos&0xFFFF)>>9; | |
1437 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; | |
1438 xpos+=xInc; | |
1439 } | |
2469 | 1440 #endif |
3272 | 1441 } |
2469 | 1442 } |
1443 | |
3126 | 1444 inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, |
3215 | 1445 uint8_t *src1, uint8_t *src2, int srcW, int xInc) |
2469 | 1446 { |
3272 | 1447 if(sws_flags != SWS_FAST_BILINEAR) |
1448 { | |
1449 RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); | |
1450 RENAME(hScale)(dst+2048, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); | |
1451 } | |
1452 else // Fast Bilinear upscale / crap downscale | |
1453 { | |
2469 | 1454 #ifdef ARCH_X86 |
1455 #ifdef HAVE_MMX2 | |
2671 | 1456 int i; |
2469 | 1457 if(canMMX2BeUsed) |
1458 { | |
1459 asm volatile( | |
1460 "pxor %%mm7, %%mm7 \n\t" | |
1461 "pxor %%mm2, %%mm2 \n\t" // 2*xalpha | |
1462 "movd %5, %%mm6 \n\t" // xInc&0xFFFF | |
1463 "punpcklwd %%mm6, %%mm6 \n\t" | |
1464 "punpcklwd %%mm6, %%mm6 \n\t" | |
1465 "movq %%mm6, %%mm2 \n\t" | |
1466 "psllq $16, %%mm2 \n\t" | |
1467 "paddw %%mm6, %%mm2 \n\t" | |
1468 "psllq $16, %%mm2 \n\t" | |
1469 "paddw %%mm6, %%mm2 \n\t" | |
1470 "psllq $16, %%mm2 \n\t" //0,t,2t,3t t=xInc&0xFFFF | |
1471 "movq %%mm2, temp0 \n\t" | |
1472 "movd %4, %%mm6 \n\t" //(xInc*4)&0xFFFF | |
1473 "punpcklwd %%mm6, %%mm6 \n\t" | |
1474 "punpcklwd %%mm6, %%mm6 \n\t" | |
1475 "xorl %%eax, %%eax \n\t" // i | |
1476 "movl %0, %%esi \n\t" // src | |
1477 "movl %1, %%edi \n\t" // buf1 | |
1478 "movl %3, %%edx \n\t" // (xInc*4)>>16 | |
1479 "xorl %%ecx, %%ecx \n\t" | |
1480 "xorl %%ebx, %%ebx \n\t" | |
1481 "movw %4, %%bx \n\t" // (xInc*4)&0xFFFF | |
1482 | |
1483 #define FUNNYUVCODE \ | |
2520 | 1484 PREFETCH" 1024(%%esi) \n\t"\ |
1485 PREFETCH" 1056(%%esi) \n\t"\ | |
1486 PREFETCH" 1088(%%esi) \n\t"\ | |
2469 | 1487 "call funnyUVCode \n\t"\ |
1488 "movq temp0, %%mm2 \n\t"\ | |
1489 "xorl %%ecx, %%ecx \n\t" | |
1490 | |
1491 FUNNYUVCODE | |
1492 FUNNYUVCODE | |
1493 FUNNYUVCODE | |
1494 FUNNYUVCODE | |
1495 | |
1496 FUNNYUVCODE | |
1497 FUNNYUVCODE | |
1498 FUNNYUVCODE | |
1499 FUNNYUVCODE | |
1500 "xorl %%eax, %%eax \n\t" // i | |
1501 "movl %6, %%esi \n\t" // src | |
1502 "movl %1, %%edi \n\t" // buf1 | |
1503 "addl $4096, %%edi \n\t" | |
1504 | |
1505 FUNNYUVCODE | |
1506 FUNNYUVCODE | |
1507 FUNNYUVCODE | |
1508 FUNNYUVCODE | |
1509 | |
1510 FUNNYUVCODE | |
1511 FUNNYUVCODE | |
1512 FUNNYUVCODE | |
1513 FUNNYUVCODE | |
1514 | |
1515 :: "m" (src1), "m" (dst), "m" (dstWidth), "m" ((xInc*4)>>16), | |
1516 "m" ((xInc*4)&0xFFFF), "m" (xInc&0xFFFF), "m" (src2) | |
1517 : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" | |
1518 ); | |
3215 | 1519 for(i=dstWidth-1; (i*xInc)>>16 >=srcW/2-1; i--) |
2469 | 1520 { |
3215 | 1521 dst[i] = src1[srcW/2-1]*128; |
1522 dst[i+2048] = src2[srcW/2-1]*128; | |
2469 | 1523 } |
1524 } | |
1525 else | |
1526 { | |
1527 #endif | |
1528 asm volatile( | |
1529 "xorl %%eax, %%eax \n\t" // i | |
1530 "xorl %%ebx, %%ebx \n\t" // xx | |
1531 "xorl %%ecx, %%ecx \n\t" // 2*xalpha | |
2800
7847d6b7ad3d
.balign or weĦll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
1532 ".balign 16 \n\t" |
2469 | 1533 "1: \n\t" |
1534 "movl %0, %%esi \n\t" | |
1535 "movzbl (%%esi, %%ebx), %%edi \n\t" //src[xx] | |
1536 "movzbl 1(%%esi, %%ebx), %%esi \n\t" //src[xx+1] | |
1537 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
1538 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
1539 "shll $16, %%edi \n\t" | |
1540 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
1541 "movl %1, %%edi \n\t" | |
1542 "shrl $9, %%esi \n\t" | |
1543 "movw %%si, (%%edi, %%eax, 2) \n\t" | |
1544 | |
1545 "movzbl (%5, %%ebx), %%edi \n\t" //src[xx] | |
1546 "movzbl 1(%5, %%ebx), %%esi \n\t" //src[xx+1] | |
1547 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
1548 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
1549 "shll $16, %%edi \n\t" | |
1550 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
1551 "movl %1, %%edi \n\t" | |
1552 "shrl $9, %%esi \n\t" | |
1553 "movw %%si, 4096(%%edi, %%eax, 2)\n\t" | |
1554 | |
1555 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF | |
1556 "adcl %3, %%ebx \n\t" //xx+= xInc>>8 + carry | |
1557 "addl $1, %%eax \n\t" | |
1558 "cmpl %2, %%eax \n\t" | |
1559 " jb 1b \n\t" | |
1560 | |
1561 :: "m" (src1), "m" (dst), "m" (dstWidth), "m" (xInc>>16), "m" (xInc&0xFFFF), | |
1562 "r" (src2) | |
1563 : "%eax", "%ebx", "%ecx", "%edi", "%esi" | |
1564 ); | |
1565 #ifdef HAVE_MMX2 | |
1566 } //if MMX2 cant be used | |
1567 #endif | |
1568 #else | |
2671 | 1569 int i; |
1570 unsigned int xpos=0; | |
1571 for(i=0;i<dstWidth;i++) | |
1572 { | |
1573 register unsigned int xx=xpos>>16; | |
1574 register unsigned int xalpha=(xpos&0xFFFF)>>9; | |
1575 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); | |
1576 dst[i+2048]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); | |
2566 | 1577 /* slower |
1578 dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha; | |
1579 dst[i+2048]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha; | |
1580 */ | |
2671 | 1581 xpos+=xInc; |
1582 } | |
2469 | 1583 #endif |
3272 | 1584 } |
1585 } | |
1586 | |
1587 static void inline RENAME(initFilter)(int16_t *filter, int16_t *filterPos, int *filterSize, int xInc, | |
1588 int srcW, int dstW) | |
1589 { | |
1590 int i; | |
1591 #ifdef HAVE_MMX | |
1592 asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS | |
1593 #endif | |
1594 | |
1595 if(xInc <= (1<<16)) // upscale / cubic interpolate | |
1596 { | |
1597 int i; | |
1598 int xDstInSrc; | |
1599 if(sws_flags==SWS_BICUBIC) *filterSize= 4; | |
1600 else *filterSize= 2; | |
1601 // printf("%d %d %d\n", filterSize, srcW, dstW); | |
1602 #ifdef HAVE_MMX | |
1603 *filterSize= (*filterSize +3) & (~3); // -> *filterSize %4 == 0 | |
1604 #endif | |
1605 xDstInSrc= xInc - 0x8000; | |
1606 for(i=0; i<dstW; i++) | |
1607 { | |
1608 int xx= (xDstInSrc>>16) - (*filterSize>>1) + 1; | |
1609 int j; | |
1610 | |
1611 filterPos[i]= xx; | |
1612 if(sws_flags == SWS_BICUBIC) | |
1613 { | |
1614 double d= ABS(((xx+1)<<16) - xDstInSrc)/(double)(1<<16); | |
1615 // int coeff; | |
1616 int y1,y2,y3,y4; | |
1617 double A= -0.75; | |
1618 // Equation is from VirtualDub | |
1619 y1 = (int)floor(0.5 + ( + A*d - 2.0*A*d*d + A*d*d*d) * 16384.0); | |
1620 y2 = (int)floor(0.5 + (+ 1.0 - (A+3.0)*d*d + (A+2.0)*d*d*d) * 16384.0); | |
1621 y3 = (int)floor(0.5 + ( - A*d + (2.0*A+3.0)*d*d - (A+2.0)*d*d*d) * 16384.0); | |
1622 y4 = (int)floor(0.5 + ( + A*d*d - A*d*d*d) * 16384.0); | |
1623 | |
1624 // printf("%d %d %d \n", coeff, (int)d, xDstInSrc); | |
1625 filter[i*(*filterSize) + 0]= y1; | |
1626 filter[i*(*filterSize) + 1]= y2; | |
1627 filter[i*(*filterSize) + 2]= y3; | |
1628 filter[i*(*filterSize) + 3]= y4; | |
1629 // printf("%1.3f %d, %d, %d, %d\n",d , y1, y2, y3, y4); | |
1630 } | |
1631 else | |
1632 { | |
1633 for(j=0; j<*filterSize; j++) | |
1634 { | |
1635 double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16); | |
1636 int coeff; | |
1637 coeff= (int)(0.5 + (1.0 - d)*(1<<14)); | |
1638 if(coeff<0) coeff=0; | |
1639 // printf("%d %d %d \n", coeff, (int)d, xDstInSrc); | |
1640 filter[i*(*filterSize) + j]= coeff; | |
1641 xx++; | |
1642 } | |
1643 } | |
1644 xDstInSrc+= xInc; | |
1645 } | |
1646 } | |
1647 else // downscale | |
1648 { | |
1649 int xDstInSrc; | |
1650 if(sws_flags==SWS_BICUBIC) *filterSize= (int)ceil(1 + 4.0*srcW / (double)dstW); | |
1651 else *filterSize= (int)ceil(1 + 2.0*srcW / (double)dstW); | |
1652 // printf("%d %d %d\n", *filterSize, srcW, dstW); | |
1653 #ifdef HAVE_MMX | |
1654 *filterSize= (*filterSize +3) & (~3); // -> *filterSize %4 == 0 | |
1655 #endif | |
1656 xDstInSrc= xInc - 0x8000; | |
1657 for(i=0; i<dstW; i++) | |
1658 { | |
1659 int xx= (int)((double)xDstInSrc/(double)(1<<16) - *filterSize*0.5 + 0.5); | |
1660 int j; | |
1661 | |
1662 filterPos[i]= xx; | |
1663 for(j=0; j<*filterSize; j++) | |
1664 { | |
1665 double d= ABS((xx<<16) - xDstInSrc)/(double)xInc; | |
1666 int coeff; | |
1667 if(sws_flags == SWS_BICUBIC) | |
1668 { | |
1669 double A= -0.75; | |
1670 // d*=2; | |
1671 // Equation is from VirtualDub | |
1672 if(d<1.0) | |
1673 coeff = (int)floor(0.5 + (1.0 - (A+3.0)*d*d | |
1674 + (A+2.0)*d*d*d) * (1<<14)); | |
1675 else if(d<2.0) | |
1676 coeff = (int)floor(0.5 + (-4.0*A + 8.0*A*d | |
1677 - 5.0*A*d*d + A*d*d*d) * (1<<14)); | |
1678 else | |
1679 coeff=0; | |
1680 } | |
1681 else | |
1682 { | |
1683 coeff= (int)(0.5 + (1.0 - d)*(1<<14)); | |
1684 if(coeff<0) coeff=0; | |
1685 } | |
1686 // printf("%d %d %d \n", coeff, (int)d, xDstInSrc); | |
1687 filter[i*(*filterSize) + j]= coeff; | |
1688 xx++; | |
1689 } | |
1690 xDstInSrc+= xInc; | |
1691 } | |
1692 } | |
1693 | |
1694 //fix borders | |
1695 for(i=0; i<dstW; i++) | |
1696 { | |
1697 int j; | |
1698 if(filterPos[i] < 0) | |
1699 { | |
1700 // Move filter coeffs left to compensate for filterPos | |
1701 for(j=1; j<*filterSize; j++) | |
1702 { | |
1703 int left= MAX(j + filterPos[i], 0); | |
1704 filter[i*(*filterSize) + left] += filter[i*(*filterSize) + j]; | |
1705 filter[i*(*filterSize) + j]=0; | |
1706 } | |
1707 filterPos[i]= 0; | |
1708 } | |
1709 | |
1710 if(filterPos[i] + *filterSize > srcW) | |
1711 { | |
1712 int shift= filterPos[i] + *filterSize - srcW; | |
1713 // Move filter coeffs right to compensate for filterPos | |
1714 for(j=*filterSize-2; j>=0; j--) | |
1715 { | |
1716 int right= MIN(j + shift, *filterSize-1); | |
1717 filter[i*(*filterSize) +right] += filter[i*(*filterSize) +j]; | |
1718 filter[i*(*filterSize) +j]=0; | |
1719 } | |
1720 filterPos[i]= srcW - *filterSize; | |
1721 } | |
1722 } | |
1723 | |
1724 //Normalize | |
1725 for(i=0; i<dstW; i++) | |
1726 { | |
1727 int j; | |
1728 double sum=0; | |
1729 double scale=1<<14; | |
1730 for(j=0; j<*filterSize; j++) | |
1731 { | |
1732 sum+= filter[i*(*filterSize) + j]; | |
1733 } | |
1734 scale/= sum; | |
1735 for(j=0; j<*filterSize; j++) | |
1736 { | |
1737 filter[i*(*filterSize) + j]= (int)(filter[i*(*filterSize) + j]*scale); | |
1738 } | |
1739 } | |
2469 | 1740 } |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1741 |
3209 | 1742 static void RENAME(SwScale_YV12slice)(unsigned char* srcptr[],int stride[], int srcSliceY , |
1743 int srcSliceH, uint8_t* dstptr[], int dststride, int dstbpp, | |
1744 int srcW, int srcH, int dstW, int dstH){ | |
2216 | 1745 |
1746 | |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1747 unsigned int s_xinc2; |
3209 | 1748 //FIXME do we need th +-2 stuff? |
1749 unsigned int s_xinc= (srcW << 16) / dstW - 2; | |
1750 unsigned int s_yinc= (srcH << 16) / dstH + 2; | |
3215 | 1751 |
3209 | 1752 static int lumDstYInSrc; // points to the dst Pixels center in the source (0 is the center of pixel 0,0 in src) |
1753 static int dstY; | |
2216 | 1754 |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1755 // last horzontally interpolated lines, used to avoid unnecessary calculations |
3209 | 1756 static int lastLumSrcY; |
1757 static int lastChrSrcY; | |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1758 |
3272 | 1759 static int oldDstW= -1; |
1760 static int oldSrcW= -1; | |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1761 |
2680 | 1762 int dstUVw; |
2638
d4211422b6cc
right green line bugfix for width not %8==0 (untested -vo vesa doesnt work)
michael
parents:
2585
diff
changeset
|
1763 int i; |
2216 | 1764 |
3209 | 1765 if(((dstW + 7)&(~7)) >= dststride) dstW&= ~7; |
2680 | 1766 |
3209 | 1767 dstUVw= fullUVIpol ? dstW : dstW/2; |
2680 | 1768 |
3215 | 1769 //printf("%d %d %d %d\n", srcW, srcH, dstW, dstH); |
1770 //printf("%d %d %d %d\n", s_xinc, s_yinc, srcSliceY, srcSliceH); | |
1771 | |
2270 | 1772 #ifdef HAVE_MMX2 |
3215 | 1773 canMMX2BeUsed= (s_xinc <= 0x10000 && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; |
2270 | 1774 #endif |
1775 | |
2279
9b9c3363abbe
horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents:
2274
diff
changeset
|
1776 // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst |
9b9c3363abbe
horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents:
2274
diff
changeset
|
1777 // n-2 is the last chrominance sample available |
9b9c3363abbe
horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents:
2274
diff
changeset
|
1778 // FIXME this is not perfect, but noone shuld notice the difference, the more correct variant |
9b9c3363abbe
horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents:
2274
diff
changeset
|
1779 // would be like the vertical one, but that would require some special code for the |
9b9c3363abbe
horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents:
2274
diff
changeset
|
1780 // first and last pixel |
9b9c3363abbe
horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents:
2274
diff
changeset
|
1781 if(canMMX2BeUsed) s_xinc+= 20; |
3215 | 1782 else s_xinc = ((srcW-2)<<16)/(dstW-2) - 20; |
2279
9b9c3363abbe
horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents:
2274
diff
changeset
|
1783 |
2566 | 1784 if(fullUVIpol && !(dstbpp==12)) s_xinc2= s_xinc>>1; |
1785 else s_xinc2= s_xinc; | |
2271 | 1786 // force calculation of the horizontal interpolation of the first line |
1787 | |
3209 | 1788 if(srcSliceY ==0){ |
3215 | 1789 // printf("dstW %d, srcw %d, mmx2 %d\n", dstW, srcW, canMMX2BeUsed); |
3209 | 1790 lastLumSrcY=-99; |
1791 lastChrSrcY=-99; | |
1792 lumDstYInSrc= s_yinc/2 - 0x8000; | |
1793 dstY=0; | |
2638
d4211422b6cc
right green line bugfix for width not %8==0 (untested -vo vesa doesnt work)
michael
parents:
2585
diff
changeset
|
1794 |
d4211422b6cc
right green line bugfix for width not %8==0 (untested -vo vesa doesnt work)
michael
parents:
2585
diff
changeset
|
1795 // clean the buffers so that no green stuff is drawen if the width is not sane (%8=0) |
3209 | 1796 for(i=dstW-2; i<dstW+20; i++) |
2638
d4211422b6cc
right green line bugfix for width not %8==0 (untested -vo vesa doesnt work)
michael
parents:
2585
diff
changeset
|
1797 { |
d4211422b6cc
right green line bugfix for width not %8==0 (untested -vo vesa doesnt work)
michael
parents:
2585
diff
changeset
|
1798 pix_buf_uv[0][i] = pix_buf_uv[1][i] |
2680 | 1799 = pix_buf_uv[0][2048+i] = pix_buf_uv[1][2048+i] = 128*128; |
2638
d4211422b6cc
right green line bugfix for width not %8==0 (untested -vo vesa doesnt work)
michael
parents:
2585
diff
changeset
|
1800 pix_buf_uv[0][i/2] = pix_buf_uv[1][i/2] |
2680 | 1801 = pix_buf_uv[0][2048+i/2] = pix_buf_uv[1][2048+i/2] = 128*128; |
2638
d4211422b6cc
right green line bugfix for width not %8==0 (untested -vo vesa doesnt work)
michael
parents:
2585
diff
changeset
|
1802 pix_buf_y[0][i]= pix_buf_y[1][i]= 0; |
d4211422b6cc
right green line bugfix for width not %8==0 (untested -vo vesa doesnt work)
michael
parents:
2585
diff
changeset
|
1803 } |
3272 | 1804 |
1805 //precalculate horizontal scaler filter coefficients | |
1806 if(oldDstW!=dstW || oldSrcW!=srcW) | |
1807 { | |
1808 // int i; | |
1809 oldDstW= dstW; oldSrcW= srcW; | |
2638
d4211422b6cc
right green line bugfix for width not %8==0 (untested -vo vesa doesnt work)
michael
parents:
2585
diff
changeset
|
1810 |
3272 | 1811 RENAME(initFilter)(hLumFilter, hLumFilterPos, &hLumFilterSize, s_xinc, srcW, dstW); |
1812 RENAME(initFilter)(hChrFilter, hChrFilterPos, &hChrFilterSize, s_xinc2, srcW, dstW); | |
1813 | |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1814 #ifdef HAVE_MMX2 |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1815 // cant downscale !!! |
3272 | 1816 if(canMMX2BeUsed) |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1817 { |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1818 uint8_t *fragment; |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1819 int imm8OfPShufW1; |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1820 int imm8OfPShufW2; |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1821 int fragmentLength; |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1822 |
2671 | 1823 int xpos, i; |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1824 |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1825 // create an optimized horizontal scaling routine |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1826 |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1827 //code fragment |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1828 |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1829 asm volatile( |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1830 "jmp 9f \n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1831 // Begin |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1832 "0: \n\t" |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1833 "movq (%%esi), %%mm0 \n\t" //FIXME Alignment |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1834 "movq %%mm0, %%mm1 \n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1835 "psrlq $8, %%mm0 \n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1836 "punpcklbw %%mm7, %%mm1 \n\t" |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1837 "movq %%mm2, %%mm3 \n\t" |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1838 "punpcklbw %%mm7, %%mm0 \n\t" |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1839 "addw %%bx, %%cx \n\t" //2*xalpha += (4*s_xinc)&0xFFFF |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1840 "pshufw $0xFF, %%mm1, %%mm1 \n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1841 "1: \n\t" |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1842 "adcl %%edx, %%esi \n\t" //xx+= (4*s_xinc)>>16 + carry |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1843 "pshufw $0xFF, %%mm0, %%mm0 \n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1844 "2: \n\t" |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1845 "psrlw $9, %%mm3 \n\t" |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1846 "psubw %%mm1, %%mm0 \n\t" |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1847 "pmullw %%mm3, %%mm0 \n\t" |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1848 "paddw %%mm6, %%mm2 \n\t" // 2*alpha += xpos&0xFFFF |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1849 "psllw $7, %%mm1 \n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1850 "paddw %%mm1, %%mm0 \n\t" |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1851 |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1852 "movq %%mm0, (%%edi, %%eax) \n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1853 |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1854 "addl $8, %%eax \n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1855 // End |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1856 "9: \n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1857 // "int $3\n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1858 "leal 0b, %0 \n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1859 "leal 1b, %1 \n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1860 "leal 2b, %2 \n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1861 "decl %1 \n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1862 "decl %2 \n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1863 "subl %0, %1 \n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1864 "subl %0, %2 \n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1865 "leal 9b, %3 \n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1866 "subl %0, %3 \n\t" |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1867 :"=r" (fragment), "=r" (imm8OfPShufW1), "=r" (imm8OfPShufW2), |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1868 "=r" (fragmentLength) |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1869 ); |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1870 |
2279
9b9c3363abbe
horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents:
2274
diff
changeset
|
1871 xpos= 0; //s_xinc/2 - 0x8000; // difference between pixel centers |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1872 |
3209 | 1873 for(i=0; i<dstW/8; i++) |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1874 { |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1875 int xx=xpos>>16; |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1876 |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1877 if((i&3) == 0) |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1878 { |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1879 int a=0; |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1880 int b=((xpos+s_xinc)>>16) - xx; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1881 int c=((xpos+s_xinc*2)>>16) - xx; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1882 int d=((xpos+s_xinc*3)>>16) - xx; |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1883 |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1884 memcpy(funnyYCode + fragmentLength*i/4, fragment, fragmentLength); |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1885 |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1886 funnyYCode[fragmentLength*i/4 + imm8OfPShufW1]= |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1887 funnyYCode[fragmentLength*i/4 + imm8OfPShufW2]= |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1888 a | (b<<2) | (c<<4) | (d<<6); |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1889 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1890 // if we dont need to read 8 bytes than dont :), reduces the chance of |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1891 // crossing a cache line |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1892 if(d<3) funnyYCode[fragmentLength*i/4 + 1]= 0x6E; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1893 |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1894 funnyYCode[fragmentLength*(i+4)/4]= RET; |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1895 } |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1896 xpos+=s_xinc; |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1897 } |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1898 |
2279
9b9c3363abbe
horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents:
2274
diff
changeset
|
1899 xpos= 0; //s_xinc2/2 - 0x10000; // difference between centers of chrom samples |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1900 for(i=0; i<dstUVw/8; i++) |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1901 { |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1902 int xx=xpos>>16; |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1903 |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1904 if((i&3) == 0) |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1905 { |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1906 int a=0; |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1907 int b=((xpos+s_xinc2)>>16) - xx; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1908 int c=((xpos+s_xinc2*2)>>16) - xx; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1909 int d=((xpos+s_xinc2*3)>>16) - xx; |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1910 |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1911 memcpy(funnyUVCode + fragmentLength*i/4, fragment, fragmentLength); |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1912 |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1913 funnyUVCode[fragmentLength*i/4 + imm8OfPShufW1]= |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1914 funnyUVCode[fragmentLength*i/4 + imm8OfPShufW2]= |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1915 a | (b<<2) | (c<<4) | (d<<6); |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1916 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1917 // if we dont need to read 8 bytes than dont :), reduces the chance of |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1918 // crossing a cache line |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1919 if(d<3) funnyUVCode[fragmentLength*i/4 + 1]= 0x6E; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1920 |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1921 funnyUVCode[fragmentLength*(i+4)/4]= RET; |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1922 } |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1923 xpos+=s_xinc2; |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1924 } |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1925 // funnyCode[0]= RET; |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1926 } |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1927 |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1928 #endif // HAVE_MMX2 |
3272 | 1929 } // Init stuff |
2216 | 1930 } // reset counters |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
1931 |
2216 | 1932 while(1){ |
3209 | 1933 unsigned char *dest =dstptr[0]+dststride*dstY; |
1934 unsigned char *uDest=dstptr[1]+(dststride>>1)*(dstY>>1); | |
1935 unsigned char *vDest=dstptr[2]+(dststride>>1)*(dstY>>1); | |
2519 | 1936 |
3209 | 1937 int lumSrcY=(lumDstYInSrc + 0xFFFF)>>16; // first luminance source line number below the dst line |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1938 // points to the dst Pixels center in the source (0 is the center of pixel 0,0 in src) |
3209 | 1939 int chrDstYInSrc= dstbpp==12 ? lumDstYInSrc + s_yinc/2 - 0x8000 : |
1940 lumDstYInSrc - 0x8000; | |
1941 int chrSrcY=(chrDstYInSrc + 0x1FFFF)>>17; // first chrominance source line number below the dst line | |
1942 int yalpha= ((lumDstYInSrc-1)&0xFFFF )>>4; | |
1943 int uvalpha=((chrDstYInSrc-1)&0x1FFFF)>>5; | |
1944 uint16_t *buf0=pix_buf_y[ lumSrcY &1]; // top line of the interpolated slice | |
1945 uint16_t *buf1=pix_buf_y[(lumSrcY+1)&1]; // bottom line of the interpolated slice | |
1946 uint16_t *uvbuf0=pix_buf_uv[ chrSrcY &1]; // top line of the interpolated slice | |
1947 uint16_t *uvbuf1=pix_buf_uv[(chrSrcY+1)&1]; // bottom line of the interpolated slice | |
2216 | 1948 |
3215 | 1949 if(lumSrcY>=srcSliceY + srcSliceH && srcSliceY + srcSliceH < srcH) break; |
3209 | 1950 if(dstY >= dstH) break; |
3215 | 1951 |
3209 | 1952 // printf("lumSrcY:%d, dstY:%d, yalpha:%d\n", lumSrcY, dstY, yalpha*100/0x1000); |
2216 | 1953 |
3209 | 1954 if((dstY&1) && dstbpp==12) uvalpha=-1; |
1955 | |
1956 dstY++; lumDstYInSrc+=s_yinc; | |
2216 | 1957 |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1958 //only interpolate the src line horizontally if we didnt do it allready |
3209 | 1959 if(lastLumSrcY!=lumSrcY) |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1960 { |
2469 | 1961 unsigned char *src; |
3215 | 1962 |
2469 | 1963 // skip if first line has been horiz scaled alleady |
3209 | 1964 if(lastLumSrcY != lumSrcY-1) |
2469 | 1965 { |
1966 // check if first line is before any available src lines | |
3209 | 1967 if(lumSrcY-1 < srcSliceY ) src=srcptr[0]+(0 )*stride[0]; |
1968 else src=srcptr[0]+(lumSrcY-srcSliceY -1)*stride[0]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1969 |
3215 | 1970 RENAME(hyscale)(buf0, dstW, src, srcW, s_xinc); |
2469 | 1971 } |
1972 // check if second line is after any available src lines | |
3209 | 1973 if(lumSrcY-srcSliceY >= srcSliceH) src=srcptr[0]+(srcSliceH-1 )*stride[0]; |
1974 else src=srcptr[0]+(lumSrcY-srcSliceY )*stride[0]; | |
2469 | 1975 |
1976 // the min() is required to avoid reuseing lines which where not available | |
3209 | 1977 lastLumSrcY= MIN(lumSrcY, srcSliceY +srcSliceH-1); |
3215 | 1978 RENAME(hyscale)(buf1, dstW, src, srcW, s_xinc); |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1979 } |
3209 | 1980 // printf("%d %d %d %d\n", y, chrSrcY, lastChrSrcY, h); |
2216 | 1981 // *** horizontal scale U and V lines to temp buffer |
3209 | 1982 if(lastChrSrcY!=chrSrcY) |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1983 { |
2469 | 1984 uint8_t *src1, *src2; |
1985 // skip if first line has been horiz scaled alleady | |
3209 | 1986 if(lastChrSrcY != chrSrcY-1) |
2469 | 1987 { |
1988 // check if first line is before any available src lines | |
3209 | 1989 if(chrSrcY-srcSliceY /2-1 < 0) |
2469 | 1990 { |
1991 src1= srcptr[1]+(0)*stride[1]; | |
1992 src2= srcptr[2]+(0)*stride[2]; | |
1993 }else{ | |
3209 | 1994 src1= srcptr[1]+(chrSrcY-srcSliceY /2-1)*stride[1]; |
1995 src2= srcptr[2]+(chrSrcY-srcSliceY /2-1)*stride[2]; | |
2469 | 1996 } |
3215 | 1997 RENAME(hcscale)(uvbuf0, dstUVw, src1, src2, srcW, s_xinc2); |
2469 | 1998 } |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
1999 |
2469 | 2000 // check if second line is after any available src lines |
3209 | 2001 if(chrSrcY - srcSliceY /2 >= srcSliceH/2) |
2279
9b9c3363abbe
horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents:
2274
diff
changeset
|
2002 { |
3209 | 2003 src1= srcptr[1]+(srcSliceH/2-1)*stride[1]; |
2004 src2= srcptr[2]+(srcSliceH/2-1)*stride[2]; | |
2469 | 2005 }else{ |
3209 | 2006 src1= srcptr[1]+(chrSrcY-srcSliceY /2)*stride[1]; |
2007 src2= srcptr[2]+(chrSrcY-srcSliceY /2)*stride[2]; | |
2279
9b9c3363abbe
horizontal scaling bugs fixed, should be mostly bugfree now
michael
parents:
2274
diff
changeset
|
2008 } |
3215 | 2009 RENAME(hcscale)(uvbuf1, dstUVw, src1, src2, srcW, s_xinc2); |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
2010 |
2469 | 2011 // the min() is required to avoid reuseing lines which where not available |
3209 | 2012 lastChrSrcY= MIN(chrSrcY, srcSliceY /2+srcSliceH/2-1); |
2469 | 2013 } |
2748 | 2014 #ifdef HAVE_MMX |
3209 | 2015 b5Dither= dither8[dstY&1]; |
2016 g6Dither= dither4[dstY&1]; | |
2017 g5Dither= dither8[dstY&1]; | |
2018 r5Dither= dither8[(dstY+1)&1]; | |
2748 | 2019 #endif |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
2020 |
2519 | 2021 if(dstbpp==12) //YV12 |
3209 | 2022 RENAME(yuv2yuv)(buf0, buf1, uvbuf0, uvbuf1, dest, uDest, vDest, dstW, yalpha, uvalpha); |
2519 | 2023 else if(ABS(s_yinc - 0x10000) < 10) |
3209 | 2024 RENAME(yuv2rgb1)(buf0, buf1, uvbuf0, uvbuf1, dest, dstW, yalpha, uvalpha, dstbpp); |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
2025 else |
3209 | 2026 RENAME(yuv2rgbX)(buf0, buf1, uvbuf0, uvbuf1, dest, dstW, yalpha, uvalpha, dstbpp); |
2216 | 2027 } |
2534
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2028 |
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2029 #ifdef HAVE_MMX |
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2030 __asm __volatile(SFENCE:::"memory"); |
2566 | 2031 __asm __volatile(EMMS:::"memory"); |
2534
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2032 #endif |
2216 | 2033 } |