annotate libvisual/lv_video_mmx.c @ 25:66610fc7de2d trunk

[svn] Remove queue.png from build.list. *grumble*
author nenolod
date Tue, 25 Oct 2005 17:50:25 -0700
parents 0db4a1dc75c4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
23
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
1 /* Libvisual - The audio visualisation framework.
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
2 *
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
3 * Copyright (C) 2004, 2005 Dennis Smit <ds@nerds-incorporated.org>
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
4 *
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
5 * Authors: Dennis Smit <ds@nerds-incorporated.org>
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
6 * Jean-Christophe Hoelt <jeko@ios-software.com>
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
7 *
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
8 * $Id:
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
9 *
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
10 * This program is free software; you can redistribute it and/or modify
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
11 * it under the terms of the GNU Lesser General Public License as
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
12 * published by the Free Software Foundation; either version 2.1
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
13 * of the License, or (at your option) any later version.
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
14 *
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
15 * This program is distributed in the hope that it will be useful,
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
18 * GNU Lesser General Public License for more details.
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
19 *
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
20 * You should have received a copy of the GNU Lesser General Public License
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
21 * along with this program; if not, write to the Free Software
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
23 */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
24
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
25 #include <stdio.h>
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
26 #include <stdlib.h>
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
27 #include <unistd.h>
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
28 #include <string.h>
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
29
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
30 #include <lvconfig.h>
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
31 #include "lv_common.h"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
32 #include "lv_video.h"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
33
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
34 int _lv_blit_overlay_alpha32_mmx (VisVideo *dest, const VisVideo *src, int x, int y)
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
35 {
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
36 #ifdef VISUAL_ARCH_X86
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
37 uint8_t *destbuf;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
38 uint8_t *srcbuf;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
39 int lwidth = (x + src->width);
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
40 int lwidth4;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
41 int lheight = (y + src->height);
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
42 int ya, xa;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
43 uint8_t alpha;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
44
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
45 if (lwidth > dest->width)
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
46 lwidth += dest->width - lwidth;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
47
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
48 if (lheight > dest->height)
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
49 lheight += dest->height - lheight;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
50
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
51 destbuf = dest->pixels;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
52 srcbuf = src->pixels;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
53
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
54 if (lwidth < 0)
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
55 return VISUAL_OK;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
56
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
57 lwidth4 = lwidth * 4;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
58
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
59 /* Reset some regs */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
60 __asm __volatile
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
61 ("\n\t pxor %%mm6, %%mm6"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
62 ::: "mm6");
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
63
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
64 destbuf += ((y > 0 ? y : 0) * dest->pitch) + (x > 0 ? x * 4 : 0);
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
65 srcbuf += ((y < 0 ? abs(y) : 0) * src->pitch) + (x < 0 ? abs(x) * 4 : 0);
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
66 for (ya = y > 0 ? y : 0; ya < lheight; ya++) {
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
67 for (xa = x > 0 ? x * 4 : 0; xa < lwidth4; xa += 4) {
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
68 /* pixel = ((alpha * ((src - dest)) / 255) + dest) */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
69 __asm __volatile
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
70 ("\n\t movd %[spix], %%mm0"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
71 "\n\t movd %[dpix], %%mm1"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
72 "\n\t movq %%mm0, %%mm2"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
73 "\n\t movq %%mm0, %%mm3"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
74 "\n\t psrlq $24, %%mm2" /* The alpha */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
75 "\n\t movq %%mm0, %%mm4"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
76 "\n\t psrld $24, %%mm3"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
77 "\n\t psrld $24, %%mm4"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
78 "\n\t psllq $32, %%mm2"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
79 "\n\t psllq $16, %%mm3"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
80 "\n\t por %%mm4, %%mm2"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
81 "\n\t punpcklbw %%mm6, %%mm0" /* interleaving dest */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
82 "\n\t por %%mm3, %%mm2"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
83 "\n\t punpcklbw %%mm6, %%mm1" /* interleaving source */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
84 "\n\t psubsw %%mm1, %%mm0" /* (src - dest) part */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
85 "\n\t pmullw %%mm2, %%mm0" /* alpha * (src - dest) */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
86 "\n\t psrlw $8, %%mm0" /* / 256 */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
87 "\n\t paddb %%mm1, %%mm0" /* + dest */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
88 "\n\t packuswb %%mm0, %%mm0"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
89 "\n\t movd %%mm0, %[dest]"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
90 : [dest] "=m" (*destbuf)
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
91 : [dpix] "m" (*destbuf)
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
92 , [spix] "m" (*srcbuf)
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
93 : "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
94
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
95 destbuf += 4;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
96 srcbuf += 4;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
97 }
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
98
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
99 destbuf += (dest->pitch - ((lwidth - x) * 4)) - (x < 0 ? x * 4 : 0);
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
100 srcbuf += x < 0 ? abs(x) * 4 : 0;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
101 srcbuf += x + src->width > dest->width ? ((x + (src->pitch / 4)) - dest->width) * 4 : 0;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
102 }
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
103
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
104 __asm __volatile
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
105 ("\n\t emms");
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
106
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
107 return VISUAL_OK;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
108 #else /* !VISUAL_ARCH_X86 */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
109 return VISUAL_ERROR_CPU_INVALID_CODE;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
110 #endif
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
111 }
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
112
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
113 int _lv_scale_bilinear_32_mmx (VisVideo *dest, const VisVideo *src)
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
114 {
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
115 #ifdef VISUAL_ARCH_X86
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
116 uint32_t y;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
117 uint32_t u, v, du, dv; /* fixed point 16.16 */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
118 uint32_t *dest_pixel, *src_pixel_rowu, *src_pixel_rowl;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
119
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
120 dest_pixel = dest->pixels;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
121
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
122 du = ((src->width - 1) << 16) / dest->width;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
123 dv = ((src->height - 1) << 16) / dest->height;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
124 v = 0;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
125
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
126 __asm__ __volatile__ ("\n\temms");
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
127
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
128 for (y = dest->height; y--; v += dv) {
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
129 uint32_t x;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
130 uint32_t fracU, fracV; /* fixed point 28.4 [0,1[ */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
131
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
132 if (v >> 16 >= src->height - 1)
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
133 v -= 0x10000;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
134
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
135 src_pixel_rowu = (src->pixel_rows[v >> 16]);
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
136 src_pixel_rowl = (src->pixel_rows[(v >> 16) + 1]);
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
137
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
138 /* fracV = frac(v) = v & 0xffff */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
139 /* fixed point format convertion: fracV >>= 8) */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
140 fracV = ((v & 0xffff) >> 12) | 0x100000;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
141 u = 0;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
142
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
143
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
144 for (x = dest->width - 1; x--; u += du) {
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
145
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
146 /* fracU = frac(u) = u & 0xffff */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
147 /* fixed point format convertion: fracU >>= 8) */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
148 fracU = ((u & 0xffff) >> 12) | 0x100000;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
149
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
150 __asm__ __volatile__
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
151 ("\n\t pxor %%mm7, %%mm7"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
152 /* Prefetching does not show improvement on my Duron (maybe due to its small cache?) */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
153 /*"\n\t prefetch 64%[pixel_l]" / * only work on 3now!/SSE cpu */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
154 /*"\n\t prefetchw 64%[output]" / * only work on 3now!/SSE cpu */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
155
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
156 /* Computing coefs values (Thread #1 and #2) => ends on #C
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
157 *
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
158 * notice 0x10 = 1.0 (fixed point 28.4 - like fracU and fracV)
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
159 *
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
160 * coef[0] = (0x10 - fracU) * (0x10 - fracV); * UL=0 *
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
161 * coef[1] = (0x10 - fracU) * fracV; * LL=1 *
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
162 * coef[2] = fracU * (0x10 - fracV); * UR=2 *
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
163 * coef[3] = fracU * fracV; * LR=3 *
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
164 */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
165
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
166 /*
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
167 * Unpacking colors (Thread #3 and #4)
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
168 */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
169 /*
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
170 * Multiplying colors by coefs (Threads #5 and #6)
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
171 */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
172 /*
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
173 * Adding colors together. (Thread #7)
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
174 */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
175
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
176 "#1\n\t movd %[fracu], %%mm4" /* mm4 = [ 0 | 0 | 0x10 | fracU ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
177 "#2\n\t movd %[fracv], %%mm6" /* mm6 = [ 0 | 0 | 0x10 | fracV ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
178
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
179 "#1\n\t punpcklwd %%mm4, %%mm4" /* mm4 = [ 0x10 | 0x10 | fracU | fracU ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
180 "#2\n\t movq %%mm6, %%mm3"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
181
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
182 "#1\n\t pxor %%mm5, %%mm5"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
183 "#2\n\t punpckldq %%mm6, %%mm6" /* mm6 = [ 0x10 | fracv | 0x10 | fracV ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
184 "#3\n\t movq %[pixel_u], %%mm0" /* mm0 = [ col[0] | col[2] ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
185
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
186 "#1\n\t punpckldq %%mm4, %%mm5" /* mm5 = [ fracU | fracU | 0 | 0 ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
187 "#2\n\t punpcklwd %%mm7, %%mm3" /* mm3 = [ 0 | 0x10 | 0 | fracV ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
188 "#3\n\t movq %%mm0, %%mm2"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
189
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
190 "#1\n\t psubusw %%mm5, %%mm4" /* mm4 = [ 0x10-fracU | 0x10-fracU | fracU | fracU ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
191 "#2\n\t punpckldq %%mm3, %%mm3" /* mm3 = [ 0 | fracV | 0 | fracV ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
192 "#4\n\t movq %[pixel_l], %%mm1" /* mm1 = [ col[1] | col[3] ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
193
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
194 "#2\n\t pslld $16, %%mm3" /* mm3 = [ fracV | 0 | fracV | 0 ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
195 "#3\n\t punpcklbw %%mm7, %%mm0" /* mm0 = [ col[0] unpacked ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
196
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
197 "#2\n\t psubusw %%mm3, %%mm6" /* mm6 = [ 0x10-fracV | fracV | 0x10-fracV | fracV ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
198 "#4\n\t movq %%mm1, %%mm3"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
199
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
200 "#C\n\t pmullw %%mm6, %%mm4" /* mm4 = [ coef[0]|coef[1]|coef[2]|coef[3] ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
201 "#5\n\t movq %%mm4, %%mm5"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
202
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
203 "#4\n\t punpcklbw %%mm7, %%mm1" /* mm1 = [ col[1] unpacked ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
204 "#6\n\t punpckhwd %%mm4, %%mm4" /* mm4 = [ coef[1]|coef[1]|coef[0]|coef[0] ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
205
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
206 "#3\n\t punpckhbw %%mm7, %%mm2" /* mm2 = [ col[2] unpacked ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
207 "#5\n\t punpcklwd %%mm5, %%mm5" /* mm5 = [ coef[2]|coef[2]|coef[3]|coef[3] ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
208
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
209 "#4\n\t punpckhbw %%mm7, %%mm3" /* mm3 = [ col[3] unpacked ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
210 "#5\n\t movq %%mm5, %%mm6"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
211
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
212 "#6\n\t movq %%mm4, %%mm7"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
213 "#5\n\t punpcklwd %%mm6, %%mm6" /* mm6 = [ coef[3]|coef[3]|coef[3]|coef[3] ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
214
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
215 "#6\n\t punpcklwd %%mm7, %%mm7" /* mm6 = [ coef[1]|coef[1]|coef[1]|coef[1] ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
216 "#5\n\t pmullw %%mm6, %%mm3" /* mm3 = [ coef[3] * col[3] unpacked ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
217
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
218 "#5\n\t punpckhwd %%mm5, %%mm5" /* mm5 = [ coef[2]|coef[2]|coef[2]|coef[2] ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
219 "#6\n\t pmullw %%mm7, %%mm1" /* mm1 = [ coef[1] * col[1] unpacked ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
220
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
221 "#5\n\t pmullw %%mm5, %%mm2" /* mm2 = [ coef[2] * col[2] unpacked ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
222 "#6\n\t punpckhwd %%mm4, %%mm4" /* mm4 = [ coef[0]|coef[0]|coef[0]|coef[0] ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
223
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
224 "#6\n\t pmullw %%mm4, %%mm0" /* mm0 = [ coef[0] * col[0] unpacked ] */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
225 "#7\n\t paddw %%mm2, %%mm3"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
226 "#7\n\t paddw %%mm1, %%mm0"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
227
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
228 "#7\n\t paddw %%mm3, %%mm0"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
229 "#7\n\t psrlw $8, %%mm0"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
230
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
231 /* Unpacking the resulting pixel */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
232 "\n\t packuswb %%mm7, %%mm0"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
233 "\n\t movd %%mm0, %[output]"
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
234
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
235 : [output] "=m"(*dest_pixel)
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
236 : [pixel_u] "m"(src_pixel_rowu[u>>16])
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
237 , [pixel_l] "m"(src_pixel_rowl[u>>16])
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
238 , [fracu] "g"(fracU)
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
239 , [fracv] "g"(fracV)
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
240 : "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7");
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
241
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
242 ++dest_pixel;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
243 }
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
244
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
245 memset (dest_pixel, 0, (dest->pitch - ((dest->width - 1) * 4)));
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
246 dest_pixel += (dest->pitch / 4) - ((dest->width - 1));
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
247
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
248 }
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
249
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
250 __asm__ __volatile__ ("\n\temms");
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
251
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
252 return VISUAL_OK;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
253 #else /* !VISUAL_ARCH_X86 */
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
254 return VISUAL_ERROR_CPU_INVALID_CODE;
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
255 #endif
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
256 }
0db4a1dc75c4 [svn] libvisual.
nenolod
parents:
diff changeset
257