annotate x86/snowdsp_mmx.c @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 9fef0a8ddd63
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1 /*
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2 * MMX and SSE2 optimized snow DSP utils
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
3 * Copyright (c) 2005-2006 Robert Edele <yartrebo@earthlink.net>
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
4 *
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
5 * This file is part of FFmpeg.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
6 *
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
11 *
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
15 * Lesser General Public License for more details.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
16 *
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
20 */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
21
12456
a5ddb39627fd Rename FF_MM_ symbols related to CPU features flags as AV_CPU_FLAG_
stefano
parents: 12414
diff changeset
22 #include "libavutil/cpu.h"
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
23 #include "libavutil/x86_cpu.h"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
24 #include "libavcodec/avcodec.h"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
25 #include "libavcodec/snow.h"
11485
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
26 #include "libavcodec/dwt.h"
11373
b57409c0c286 Add some missing #includes
mru
parents: 11369
diff changeset
27 #include "dsputil_mmx.h"
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
28
11485
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
29 static void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
30 const int w2= (width+1)>>1;
11369
98970e51365a Remove DECLARE_ALIGNED_{8,16} macros
mru
parents: 10961
diff changeset
31 DECLARE_ALIGNED(16, IDWTELEM, temp)[width>>1];
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
32 const int w_l= (width>>1);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
33 const int w_r= w2 - 1;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
34 int i;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
35
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
36 { // Lift 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
37 IDWTELEM * const ref = b + w2 - 1;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
38 IDWTELEM b_0 = b[0]; //By allowing the first entry in b[0] to be calculated twice
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
39 // (the first time erroneously), we allow the SSE2 code to run an extra pass.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
40 // The savings in code and time are well worth having to store this value and
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
41 // calculate b[0] correctly afterwards.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
42
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
43 i = 0;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
44 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
45 "pcmpeqd %%xmm7, %%xmm7 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
46 "pcmpeqd %%xmm3, %%xmm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
47 "psllw $1, %%xmm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
48 "paddw %%xmm7, %%xmm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
49 "psllw $13, %%xmm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
50 ::);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
51 for(; i<w_l-15; i+=16){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
52 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
53 "movdqu (%1), %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
54 "movdqu 16(%1), %%xmm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
55 "movdqu 2(%1), %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
56 "movdqu 18(%1), %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
57 "paddw %%xmm1, %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
58 "paddw %%xmm5, %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
59 "paddw %%xmm7, %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
60 "paddw %%xmm7, %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
61 "pmulhw %%xmm3, %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
62 "pmulhw %%xmm3, %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
63 "paddw (%0), %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
64 "paddw 16(%0), %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
65 "movdqa %%xmm2, (%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
66 "movdqa %%xmm6, 16(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
67 :: "r"(&b[i]), "r"(&ref[i])
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
68 : "memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
69 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
70 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
71 snow_horizontal_compose_lift_lead_out(i, b, b, ref, width, w_l, 0, W_DM, W_DO, W_DS);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
72 b[0] = b_0 - ((W_DM * 2 * ref[1]+W_DO)>>W_DS);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
73 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
74
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
75 { // Lift 1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
76 IDWTELEM * const dst = b+w2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
77
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
78 i = 0;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
79 for(; (((x86_reg)&dst[i]) & 0x1F) && i<w_r; i++){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
80 dst[i] = dst[i] - (b[i] + b[i + 1]);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
81 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
82 for(; i<w_r-15; i+=16){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
83 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
84 "movdqu (%1), %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
85 "movdqu 16(%1), %%xmm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
86 "movdqu 2(%1), %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
87 "movdqu 18(%1), %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
88 "paddw %%xmm1, %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
89 "paddw %%xmm5, %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
90 "movdqa (%0), %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
91 "movdqa 16(%0), %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
92 "psubw %%xmm2, %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
93 "psubw %%xmm6, %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
94 "movdqa %%xmm0, (%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
95 "movdqa %%xmm4, 16(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
96 :: "r"(&dst[i]), "r"(&b[i])
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
97 : "memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
98 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
99 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
100 snow_horizontal_compose_lift_lead_out(i, dst, dst, b, width, w_r, 1, W_CM, W_CO, W_CS);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
101 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
102
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
103 { // Lift 2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
104 IDWTELEM * const ref = b+w2 - 1;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
105 IDWTELEM b_0 = b[0];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
106
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
107 i = 0;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
108 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
109 "psllw $15, %%xmm7 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
110 "pcmpeqw %%xmm6, %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
111 "psrlw $13, %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
112 "paddw %%xmm7, %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
113 ::);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
114 for(; i<w_l-15; i+=16){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
115 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
116 "movdqu (%1), %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
117 "movdqu 16(%1), %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
118 "movdqu 2(%1), %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
119 "movdqu 18(%1), %%xmm5 \n\t" //FIXME try aligned reads and shifts
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
120 "paddw %%xmm6, %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
121 "paddw %%xmm6, %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
122 "paddw %%xmm7, %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
123 "paddw %%xmm7, %%xmm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
124 "pavgw %%xmm1, %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
125 "pavgw %%xmm5, %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
126 "psubw %%xmm7, %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
127 "psubw %%xmm7, %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
128 "psraw $1, %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
129 "psraw $1, %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
130 "movdqa (%0), %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
131 "movdqa 16(%0), %%xmm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
132 "paddw %%xmm1, %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
133 "paddw %%xmm5, %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
134 "psraw $2, %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
135 "psraw $2, %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
136 "paddw %%xmm1, %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
137 "paddw %%xmm5, %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
138 "movdqa %%xmm0, (%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
139 "movdqa %%xmm4, 16(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
140 :: "r"(&b[i]), "r"(&ref[i])
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
141 : "memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
142 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
143 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
144 snow_horizontal_compose_liftS_lead_out(i, b, b, ref, width, w_l);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
145 b[0] = b_0 + ((2 * ref[1] + W_BO-1 + 4 * b_0) >> W_BS);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
146 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
147
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
148 { // Lift 3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
149 IDWTELEM * const src = b+w2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
150
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
151 i = 0;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
152 for(; (((x86_reg)&temp[i]) & 0x1F) && i<w_r; i++){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
153 temp[i] = src[i] - ((-W_AM*(b[i] + b[i+1]))>>W_AS);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
154 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
155 for(; i<w_r-7; i+=8){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
156 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
157 "movdqu 2(%1), %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
158 "movdqu 18(%1), %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
159 "paddw (%1), %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
160 "paddw 16(%1), %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
161 "movdqu (%0), %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
162 "movdqu 16(%0), %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
163 "paddw %%xmm2, %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
164 "paddw %%xmm6, %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
165 "psraw $1, %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
166 "psraw $1, %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
167 "paddw %%xmm0, %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
168 "paddw %%xmm4, %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
169 "movdqa %%xmm2, (%2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
170 "movdqa %%xmm6, 16(%2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
171 :: "r"(&src[i]), "r"(&b[i]), "r"(&temp[i])
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
172 : "memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
173 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
174 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
175 snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO+1, W_AS);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
176 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
177
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
178 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
179 snow_interleave_line_header(&i, width, b, temp);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
180
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
181 for (; (i & 0x3E) != 0x3E; i-=2){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
182 b[i+1] = temp[i>>1];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
183 b[i] = b[i>>1];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
184 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
185 for (i-=62; i>=0; i-=64){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
186 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
187 "movdqa (%1), %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
188 "movdqa 16(%1), %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
189 "movdqa 32(%1), %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
190 "movdqa 48(%1), %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
191 "movdqa (%1), %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
192 "movdqa 16(%1), %%xmm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
193 "movdqa 32(%1), %%xmm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
194 "movdqa 48(%1), %%xmm7 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
195 "punpcklwd (%2), %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
196 "punpcklwd 16(%2), %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
197 "punpcklwd 32(%2), %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
198 "punpcklwd 48(%2), %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
199 "movdqa %%xmm0, (%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
200 "movdqa %%xmm2, 32(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
201 "movdqa %%xmm4, 64(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
202 "movdqa %%xmm6, 96(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
203 "punpckhwd (%2), %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
204 "punpckhwd 16(%2), %%xmm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
205 "punpckhwd 32(%2), %%xmm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
206 "punpckhwd 48(%2), %%xmm7 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
207 "movdqa %%xmm1, 16(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
208 "movdqa %%xmm3, 48(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
209 "movdqa %%xmm5, 80(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
210 "movdqa %%xmm7, 112(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
211 :: "r"(&(b)[i]), "r"(&(b)[i>>1]), "r"(&(temp)[i>>1])
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
212 : "memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
213 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
214 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
215 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
216 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
217
11485
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
218 static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
219 const int w2= (width+1)>>1;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
220 IDWTELEM temp[width >> 1];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
221 const int w_l= (width>>1);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
222 const int w_r= w2 - 1;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
223 int i;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
224
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
225 { // Lift 0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
226 IDWTELEM * const ref = b + w2 - 1;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
227
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
228 i = 1;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
229 b[0] = b[0] - ((W_DM * 2 * ref[1]+W_DO)>>W_DS);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
230 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
231 "pcmpeqw %%mm7, %%mm7 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
232 "pcmpeqw %%mm3, %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
233 "psllw $1, %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
234 "paddw %%mm7, %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
235 "psllw $13, %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
236 ::);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
237 for(; i<w_l-7; i+=8){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
238 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
239 "movq (%1), %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
240 "movq 8(%1), %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
241 "paddw 2(%1), %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
242 "paddw 10(%1), %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
243 "paddw %%mm7, %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
244 "paddw %%mm7, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
245 "pmulhw %%mm3, %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
246 "pmulhw %%mm3, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
247 "paddw (%0), %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
248 "paddw 8(%0), %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
249 "movq %%mm2, (%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
250 "movq %%mm6, 8(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
251 :: "r"(&b[i]), "r"(&ref[i])
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
252 : "memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
253 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
254 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
255 snow_horizontal_compose_lift_lead_out(i, b, b, ref, width, w_l, 0, W_DM, W_DO, W_DS);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
256 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
257
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
258 { // Lift 1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
259 IDWTELEM * const dst = b+w2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
260
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
261 i = 0;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
262 for(; i<w_r-7; i+=8){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
263 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
264 "movq (%1), %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
265 "movq 8(%1), %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
266 "paddw 2(%1), %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
267 "paddw 10(%1), %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
268 "movq (%0), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
269 "movq 8(%0), %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
270 "psubw %%mm2, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
271 "psubw %%mm6, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
272 "movq %%mm0, (%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
273 "movq %%mm4, 8(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
274 :: "r"(&dst[i]), "r"(&b[i])
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
275 : "memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
276 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
277 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
278 snow_horizontal_compose_lift_lead_out(i, dst, dst, b, width, w_r, 1, W_CM, W_CO, W_CS);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
279 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
280
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
281 { // Lift 2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
282 IDWTELEM * const ref = b+w2 - 1;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
283
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
284 i = 1;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
285 b[0] = b[0] + (((2 * ref[1] + W_BO) + 4 * b[0]) >> W_BS);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
286 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
287 "psllw $15, %%mm7 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
288 "pcmpeqw %%mm6, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
289 "psrlw $13, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
290 "paddw %%mm7, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
291 ::);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
292 for(; i<w_l-7; i+=8){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
293 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
294 "movq (%1), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
295 "movq 8(%1), %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
296 "movq 2(%1), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
297 "movq 10(%1), %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
298 "paddw %%mm6, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
299 "paddw %%mm6, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
300 "paddw %%mm7, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
301 "paddw %%mm7, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
302 "pavgw %%mm1, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
303 "pavgw %%mm5, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
304 "psubw %%mm7, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
305 "psubw %%mm7, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
306 "psraw $1, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
307 "psraw $1, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
308 "movq (%0), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
309 "movq 8(%0), %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
310 "paddw %%mm1, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
311 "paddw %%mm5, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
312 "psraw $2, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
313 "psraw $2, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
314 "paddw %%mm1, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
315 "paddw %%mm5, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
316 "movq %%mm0, (%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
317 "movq %%mm4, 8(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
318 :: "r"(&b[i]), "r"(&ref[i])
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
319 : "memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
320 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
321 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
322 snow_horizontal_compose_liftS_lead_out(i, b, b, ref, width, w_l);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
323 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
324
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
325 { // Lift 3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
326 IDWTELEM * const src = b+w2;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
327 i = 0;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
328
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
329 for(; i<w_r-7; i+=8){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
330 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
331 "movq 2(%1), %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
332 "movq 10(%1), %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
333 "paddw (%1), %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
334 "paddw 8(%1), %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
335 "movq (%0), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
336 "movq 8(%0), %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
337 "paddw %%mm2, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
338 "paddw %%mm6, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
339 "psraw $1, %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
340 "psraw $1, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
341 "paddw %%mm0, %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
342 "paddw %%mm4, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
343 "movq %%mm2, (%2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
344 "movq %%mm6, 8(%2) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
345 :: "r"(&src[i]), "r"(&b[i]), "r"(&temp[i])
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
346 : "memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
347 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
348 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
349 snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO+1, W_AS);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
350 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
351
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
352 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
353 snow_interleave_line_header(&i, width, b, temp);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
354
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
355 for (; (i & 0x1E) != 0x1E; i-=2){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
356 b[i+1] = temp[i>>1];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
357 b[i] = b[i>>1];
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
358 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
359 for (i-=30; i>=0; i-=32){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
360 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
361 "movq (%1), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
362 "movq 8(%1), %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
363 "movq 16(%1), %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
364 "movq 24(%1), %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
365 "movq (%1), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
366 "movq 8(%1), %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
367 "movq 16(%1), %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
368 "movq 24(%1), %%mm7 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
369 "punpcklwd (%2), %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
370 "punpcklwd 8(%2), %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
371 "punpcklwd 16(%2), %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
372 "punpcklwd 24(%2), %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
373 "movq %%mm0, (%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
374 "movq %%mm2, 16(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
375 "movq %%mm4, 32(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
376 "movq %%mm6, 48(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
377 "punpckhwd (%2), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
378 "punpckhwd 8(%2), %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
379 "punpckhwd 16(%2), %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
380 "punpckhwd 24(%2), %%mm7 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
381 "movq %%mm1, 8(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
382 "movq %%mm3, 24(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
383 "movq %%mm5, 40(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
384 "movq %%mm7, 56(%0) \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
385 :: "r"(&b[i]), "r"(&b[i>>1]), "r"(&temp[i>>1])
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
386 : "memory"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
387 );
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
388 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
389 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
390 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
391
8590
7a463923ecd1 Change semantic of CONFIG_*, HAVE_* and ARCH_*.
aurel
parents: 8430
diff changeset
392 #if HAVE_7REGS
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
393 #define snow_vertical_compose_sse2_load_add(op,r,t0,t1,t2,t3)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
394 ""op" ("r",%%"REG_d"), %%"t0" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
395 ""op" 16("r",%%"REG_d"), %%"t1" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
396 ""op" 32("r",%%"REG_d"), %%"t2" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
397 ""op" 48("r",%%"REG_d"), %%"t3" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
398
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
399 #define snow_vertical_compose_sse2_load(r,t0,t1,t2,t3)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
400 snow_vertical_compose_sse2_load_add("movdqa",r,t0,t1,t2,t3)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
401
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
402 #define snow_vertical_compose_sse2_add(r,t0,t1,t2,t3)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
403 snow_vertical_compose_sse2_load_add("paddw",r,t0,t1,t2,t3)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
404
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
405 #define snow_vertical_compose_r2r_sub(s0,s1,s2,s3,t0,t1,t2,t3)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
406 "psubw %%"s0", %%"t0" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
407 "psubw %%"s1", %%"t1" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
408 "psubw %%"s2", %%"t2" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
409 "psubw %%"s3", %%"t3" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
410
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
411 #define snow_vertical_compose_sse2_store(w,s0,s1,s2,s3)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
412 "movdqa %%"s0", ("w",%%"REG_d") \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
413 "movdqa %%"s1", 16("w",%%"REG_d") \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
414 "movdqa %%"s2", 32("w",%%"REG_d") \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
415 "movdqa %%"s3", 48("w",%%"REG_d") \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
416
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
417 #define snow_vertical_compose_sra(n,t0,t1,t2,t3)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
418 "psraw $"n", %%"t0" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
419 "psraw $"n", %%"t1" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
420 "psraw $"n", %%"t2" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
421 "psraw $"n", %%"t3" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
422
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
423 #define snow_vertical_compose_r2r_add(s0,s1,s2,s3,t0,t1,t2,t3)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
424 "paddw %%"s0", %%"t0" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
425 "paddw %%"s1", %%"t1" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
426 "paddw %%"s2", %%"t2" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
427 "paddw %%"s3", %%"t3" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
428
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
429 #define snow_vertical_compose_r2r_pmulhw(s0,s1,s2,s3,t0,t1,t2,t3)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
430 "pmulhw %%"s0", %%"t0" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
431 "pmulhw %%"s1", %%"t1" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
432 "pmulhw %%"s2", %%"t2" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
433 "pmulhw %%"s3", %%"t3" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
434
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
435 #define snow_vertical_compose_sse2_move(s0,s1,s2,s3,t0,t1,t2,t3)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
436 "movdqa %%"s0", %%"t0" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
437 "movdqa %%"s1", %%"t1" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
438 "movdqa %%"s2", %%"t2" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
439 "movdqa %%"s3", %%"t3" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
440
11485
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
441 static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
442 x86_reg i = width;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
443
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
444 while(i & 0x1F)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
445 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
446 i--;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
447 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
448 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
449 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
450 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
451 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
452 i+=i;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
453
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
454 __asm__ volatile (
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
455 "jmp 2f \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
456 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
457 snow_vertical_compose_sse2_load("%4","xmm0","xmm2","xmm4","xmm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
458 snow_vertical_compose_sse2_add("%6","xmm0","xmm2","xmm4","xmm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
459
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
460
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
461 "pcmpeqw %%xmm0, %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
462 "pcmpeqw %%xmm2, %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
463 "paddw %%xmm2, %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
464 "paddw %%xmm0, %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
465 "psllw $13, %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
466 snow_vertical_compose_r2r_add("xmm0","xmm0","xmm0","xmm0","xmm1","xmm3","xmm5","xmm7")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
467 snow_vertical_compose_r2r_pmulhw("xmm2","xmm2","xmm2","xmm2","xmm1","xmm3","xmm5","xmm7")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
468 snow_vertical_compose_sse2_add("%5","xmm1","xmm3","xmm5","xmm7")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
469 snow_vertical_compose_sse2_store("%5","xmm1","xmm3","xmm5","xmm7")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
470 snow_vertical_compose_sse2_load("%4","xmm0","xmm2","xmm4","xmm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
471 snow_vertical_compose_sse2_add("%3","xmm1","xmm3","xmm5","xmm7")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
472 snow_vertical_compose_r2r_sub("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
473 snow_vertical_compose_sse2_store("%4","xmm0","xmm2","xmm4","xmm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
474
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
475 "pcmpeqw %%xmm7, %%xmm7 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
476 "pcmpeqw %%xmm5, %%xmm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
477 "psllw $15, %%xmm7 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
478 "psrlw $13, %%xmm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
479 "paddw %%xmm7, %%xmm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
480 snow_vertical_compose_r2r_add("xmm5","xmm5","xmm5","xmm5","xmm0","xmm2","xmm4","xmm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
481 "movq (%2,%%"REG_d"), %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
482 "movq 8(%2,%%"REG_d"), %%xmm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
483 "paddw %%xmm7, %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
484 "paddw %%xmm7, %%xmm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
485 "pavgw %%xmm1, %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
486 "pavgw %%xmm3, %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
487 "movq 16(%2,%%"REG_d"), %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
488 "movq 24(%2,%%"REG_d"), %%xmm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
489 "paddw %%xmm7, %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
490 "paddw %%xmm7, %%xmm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
491 "pavgw %%xmm1, %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
492 "pavgw %%xmm3, %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
493 snow_vertical_compose_r2r_sub("xmm7","xmm7","xmm7","xmm7","xmm0","xmm2","xmm4","xmm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
494 snow_vertical_compose_sra("1","xmm0","xmm2","xmm4","xmm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
495 snow_vertical_compose_sse2_add("%3","xmm0","xmm2","xmm4","xmm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
496
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
497 snow_vertical_compose_sra("2","xmm0","xmm2","xmm4","xmm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
498 snow_vertical_compose_sse2_add("%3","xmm0","xmm2","xmm4","xmm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
499 snow_vertical_compose_sse2_store("%3","xmm0","xmm2","xmm4","xmm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
500 snow_vertical_compose_sse2_add("%1","xmm0","xmm2","xmm4","xmm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
501 snow_vertical_compose_sse2_move("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
502 snow_vertical_compose_sra("1","xmm0","xmm2","xmm4","xmm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
503 snow_vertical_compose_r2r_add("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
504 snow_vertical_compose_sse2_add("%2","xmm0","xmm2","xmm4","xmm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
505 snow_vertical_compose_sse2_store("%2","xmm0","xmm2","xmm4","xmm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
506
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
507 "2: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
508 "sub $64, %%"REG_d" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
509 "jge 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
510 :"+d"(i)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
511 :"r"(b0),"r"(b1),"r"(b2),"r"(b3),"r"(b4),"r"(b5));
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
512 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
513
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
514 #define snow_vertical_compose_mmx_load_add(op,r,t0,t1,t2,t3)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
515 ""op" ("r",%%"REG_d"), %%"t0" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
516 ""op" 8("r",%%"REG_d"), %%"t1" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
517 ""op" 16("r",%%"REG_d"), %%"t2" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
518 ""op" 24("r",%%"REG_d"), %%"t3" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
519
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
520 #define snow_vertical_compose_mmx_load(r,t0,t1,t2,t3)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
521 snow_vertical_compose_mmx_load_add("movq",r,t0,t1,t2,t3)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
522
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
523 #define snow_vertical_compose_mmx_add(r,t0,t1,t2,t3)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
524 snow_vertical_compose_mmx_load_add("paddw",r,t0,t1,t2,t3)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
525
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
526 #define snow_vertical_compose_mmx_store(w,s0,s1,s2,s3)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
527 "movq %%"s0", ("w",%%"REG_d") \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
528 "movq %%"s1", 8("w",%%"REG_d") \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
529 "movq %%"s2", 16("w",%%"REG_d") \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
530 "movq %%"s3", 24("w",%%"REG_d") \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
531
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
532 #define snow_vertical_compose_mmx_move(s0,s1,s2,s3,t0,t1,t2,t3)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
533 "movq %%"s0", %%"t0" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
534 "movq %%"s1", %%"t1" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
535 "movq %%"s2", %%"t2" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
536 "movq %%"s3", %%"t3" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
537
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
538
11485
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
539 static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
540 x86_reg i = width;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
541 while(i & 15)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
542 {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
543 i--;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
544 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
545 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
546 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
547 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
548 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
549 i+=i;
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
550 __asm__ volatile(
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
551 "jmp 2f \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
552 "1: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
553
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
554 snow_vertical_compose_mmx_load("%4","mm1","mm3","mm5","mm7")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
555 snow_vertical_compose_mmx_add("%6","mm1","mm3","mm5","mm7")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
556 "pcmpeqw %%mm0, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
557 "pcmpeqw %%mm2, %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
558 "paddw %%mm2, %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
559 "paddw %%mm0, %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
560 "psllw $13, %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
561 snow_vertical_compose_r2r_add("mm0","mm0","mm0","mm0","mm1","mm3","mm5","mm7")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
562 snow_vertical_compose_r2r_pmulhw("mm2","mm2","mm2","mm2","mm1","mm3","mm5","mm7")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
563 snow_vertical_compose_mmx_add("%5","mm1","mm3","mm5","mm7")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
564 snow_vertical_compose_mmx_store("%5","mm1","mm3","mm5","mm7")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
565 snow_vertical_compose_mmx_load("%4","mm0","mm2","mm4","mm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
566 snow_vertical_compose_mmx_add("%3","mm1","mm3","mm5","mm7")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
567 snow_vertical_compose_r2r_sub("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
568 snow_vertical_compose_mmx_store("%4","mm0","mm2","mm4","mm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
569 "pcmpeqw %%mm7, %%mm7 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
570 "pcmpeqw %%mm5, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
571 "psllw $15, %%mm7 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
572 "psrlw $13, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
573 "paddw %%mm7, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
574 snow_vertical_compose_r2r_add("mm5","mm5","mm5","mm5","mm0","mm2","mm4","mm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
575 "movq (%2,%%"REG_d"), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
576 "movq 8(%2,%%"REG_d"), %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
577 "paddw %%mm7, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
578 "paddw %%mm7, %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
579 "pavgw %%mm1, %%mm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
580 "pavgw %%mm3, %%mm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
581 "movq 16(%2,%%"REG_d"), %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
582 "movq 24(%2,%%"REG_d"), %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
583 "paddw %%mm7, %%mm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
584 "paddw %%mm7, %%mm3 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
585 "pavgw %%mm1, %%mm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
586 "pavgw %%mm3, %%mm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
587 snow_vertical_compose_r2r_sub("mm7","mm7","mm7","mm7","mm0","mm2","mm4","mm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
588 snow_vertical_compose_sra("1","mm0","mm2","mm4","mm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
589 snow_vertical_compose_mmx_add("%3","mm0","mm2","mm4","mm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
590
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
591 snow_vertical_compose_sra("2","mm0","mm2","mm4","mm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
592 snow_vertical_compose_mmx_add("%3","mm0","mm2","mm4","mm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
593 snow_vertical_compose_mmx_store("%3","mm0","mm2","mm4","mm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
594 snow_vertical_compose_mmx_add("%1","mm0","mm2","mm4","mm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
595 snow_vertical_compose_mmx_move("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
596 snow_vertical_compose_sra("1","mm0","mm2","mm4","mm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
597 snow_vertical_compose_r2r_add("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
598 snow_vertical_compose_mmx_add("%2","mm0","mm2","mm4","mm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
599 snow_vertical_compose_mmx_store("%2","mm0","mm2","mm4","mm6")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
600
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
601 "2: \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
602 "sub $32, %%"REG_d" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
603 "jge 1b \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
604 :"+d"(i)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
605 :"r"(b0),"r"(b1),"r"(b2),"r"(b3),"r"(b4),"r"(b5));
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
606 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
607 #endif //HAVE_7REGS
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
608
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
609 #define snow_inner_add_yblock_sse2_header \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
610 IDWTELEM * * dst_array = sb->line + src_y;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
611 x86_reg tmp;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
612 __asm__ volatile(\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
613 "mov %7, %%"REG_c" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
614 "mov %6, %2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
615 "mov %4, %%"REG_S" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
616 "pxor %%xmm7, %%xmm7 \n\t" /* 0 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
617 "pcmpeqd %%xmm3, %%xmm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
618 "psllw $15, %%xmm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
619 "psrlw $12, %%xmm3 \n\t" /* FRAC_BITS >> 1 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
620 "1: \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
621 "mov %1, %%"REG_D" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
622 "mov (%%"REG_D"), %%"REG_D" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
623 "add %3, %%"REG_D" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
624
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
625 #define snow_inner_add_yblock_sse2_start_8(out_reg1, out_reg2, ptr_offset, s_offset)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
626 "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
627 "movq (%%"REG_d"), %%"out_reg1" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
628 "movq (%%"REG_d", %%"REG_c"), %%"out_reg2" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
629 "punpcklbw %%xmm7, %%"out_reg1" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
630 "punpcklbw %%xmm7, %%"out_reg2" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
631 "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
632 "movq "s_offset"+16(%%"REG_S"), %%xmm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
633 "punpcklbw %%xmm7, %%xmm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
634 "punpcklbw %%xmm7, %%xmm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
635 "pmullw %%xmm0, %%"out_reg1" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
636 "pmullw %%xmm4, %%"out_reg2" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
637
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
638 #define snow_inner_add_yblock_sse2_start_16(out_reg1, out_reg2, ptr_offset, s_offset)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
639 "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
640 "movq (%%"REG_d"), %%"out_reg1" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
641 "movq 8(%%"REG_d"), %%"out_reg2" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
642 "punpcklbw %%xmm7, %%"out_reg1" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
643 "punpcklbw %%xmm7, %%"out_reg2" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
644 "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
645 "movq "s_offset"+8(%%"REG_S"), %%xmm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
646 "punpcklbw %%xmm7, %%xmm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
647 "punpcklbw %%xmm7, %%xmm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
648 "pmullw %%xmm0, %%"out_reg1" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
649 "pmullw %%xmm4, %%"out_reg2" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
650
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
651 #define snow_inner_add_yblock_sse2_accum_8(ptr_offset, s_offset) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
652 snow_inner_add_yblock_sse2_start_8("xmm2", "xmm6", ptr_offset, s_offset)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
653 "paddusw %%xmm2, %%xmm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
654 "paddusw %%xmm6, %%xmm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
655
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
656 #define snow_inner_add_yblock_sse2_accum_16(ptr_offset, s_offset) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
657 snow_inner_add_yblock_sse2_start_16("xmm2", "xmm6", ptr_offset, s_offset)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
658 "paddusw %%xmm2, %%xmm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
659 "paddusw %%xmm6, %%xmm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
660
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
661 #define snow_inner_add_yblock_sse2_end_common1\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
662 "add $32, %%"REG_S" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
663 "add %%"REG_c", %0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
664 "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
665 "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
666 "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
667 "add %%"REG_c", (%%"REG_a") \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
668
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
669 #define snow_inner_add_yblock_sse2_end_common2\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
670 "jnz 1b \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
671 :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
672 :\
9194
6bf01d174630 Remove useless casting in asm "m" operand.
cehoyos
parents: 8590
diff changeset
673 "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
674 "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
675
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
676 #define snow_inner_add_yblock_sse2_end_8\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
677 "sal $1, %%"REG_c" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
678 "add $"PTR_SIZE"*2, %1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
679 snow_inner_add_yblock_sse2_end_common1\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
680 "sar $1, %%"REG_c" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
681 "sub $2, %2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
682 snow_inner_add_yblock_sse2_end_common2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
683
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
684 #define snow_inner_add_yblock_sse2_end_16\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
685 "add $"PTR_SIZE"*1, %1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
686 snow_inner_add_yblock_sse2_end_common1\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
687 "dec %2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
688 snow_inner_add_yblock_sse2_end_common2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
689
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
690 static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
691 int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
692 snow_inner_add_yblock_sse2_header
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
693 snow_inner_add_yblock_sse2_start_8("xmm1", "xmm5", "3", "0")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
694 snow_inner_add_yblock_sse2_accum_8("2", "8")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
695 snow_inner_add_yblock_sse2_accum_8("1", "128")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
696 snow_inner_add_yblock_sse2_accum_8("0", "136")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
697
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
698 "mov %0, %%"REG_d" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
699 "movdqa (%%"REG_D"), %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
700 "movdqa %%xmm1, %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
701
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
702 "punpckhwd %%xmm7, %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
703 "punpcklwd %%xmm7, %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
704 "paddd %%xmm2, %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
705 "movdqa 16(%%"REG_D"), %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
706 "paddd %%xmm1, %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
707 "paddd %%xmm3, %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
708 "paddd %%xmm3, %%xmm2 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
709
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
710 "mov %1, %%"REG_D" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
711 "mov "PTR_SIZE"(%%"REG_D"), %%"REG_D";\n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
712 "add %3, %%"REG_D" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
713
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
714 "movdqa (%%"REG_D"), %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
715 "movdqa %%xmm5, %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
716 "punpckhwd %%xmm7, %%xmm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
717 "punpcklwd %%xmm7, %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
718 "paddd %%xmm6, %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
719 "movdqa 16(%%"REG_D"), %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
720 "paddd %%xmm5, %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
721 "paddd %%xmm3, %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
722 "paddd %%xmm3, %%xmm6 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
723
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
724 "psrad $8, %%xmm0 \n\t" /* FRAC_BITS. */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
725 "psrad $8, %%xmm2 \n\t" /* FRAC_BITS. */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
726 "packssdw %%xmm2, %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
727 "packuswb %%xmm7, %%xmm0 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
728 "movq %%xmm0, (%%"REG_d") \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
729
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
730 "psrad $8, %%xmm4 \n\t" /* FRAC_BITS. */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
731 "psrad $8, %%xmm6 \n\t" /* FRAC_BITS. */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
732 "packssdw %%xmm6, %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
733 "packuswb %%xmm7, %%xmm4 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
734 "movq %%xmm4, (%%"REG_d",%%"REG_c");\n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
735 snow_inner_add_yblock_sse2_end_8
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
736 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
737
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
738 static void inner_add_yblock_bw_16_obmc_32_sse2(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
739 int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
740 snow_inner_add_yblock_sse2_header
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
741 snow_inner_add_yblock_sse2_start_16("xmm1", "xmm5", "3", "0")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
742 snow_inner_add_yblock_sse2_accum_16("2", "16")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
743 snow_inner_add_yblock_sse2_accum_16("1", "512")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
744 snow_inner_add_yblock_sse2_accum_16("0", "528")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
745
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
746 "mov %0, %%"REG_d" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
747 "psrlw $4, %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
748 "psrlw $4, %%xmm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
749 "paddw (%%"REG_D"), %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
750 "paddw 16(%%"REG_D"), %%xmm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
751 "paddw %%xmm3, %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
752 "paddw %%xmm3, %%xmm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
753 "psraw $4, %%xmm1 \n\t" /* FRAC_BITS. */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
754 "psraw $4, %%xmm5 \n\t" /* FRAC_BITS. */
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
755 "packuswb %%xmm5, %%xmm1 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
756
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
757 "movdqu %%xmm1, (%%"REG_d") \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
758
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
759 snow_inner_add_yblock_sse2_end_16
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
760 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
761
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
762 #define snow_inner_add_yblock_mmx_header \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
763 IDWTELEM * * dst_array = sb->line + src_y;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
764 x86_reg tmp;\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
765 __asm__ volatile(\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
766 "mov %7, %%"REG_c" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
767 "mov %6, %2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
768 "mov %4, %%"REG_S" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
769 "pxor %%mm7, %%mm7 \n\t" /* 0 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
770 "pcmpeqd %%mm3, %%mm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
771 "psllw $15, %%mm3 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
772 "psrlw $12, %%mm3 \n\t" /* FRAC_BITS >> 1 */\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
773 "1: \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
774 "mov %1, %%"REG_D" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
775 "mov (%%"REG_D"), %%"REG_D" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
776 "add %3, %%"REG_D" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
777
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
778 #define snow_inner_add_yblock_mmx_start(out_reg1, out_reg2, ptr_offset, s_offset, d_offset)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
779 "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
780 "movd "d_offset"(%%"REG_d"), %%"out_reg1" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
781 "movd "d_offset"+4(%%"REG_d"), %%"out_reg2" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
782 "punpcklbw %%mm7, %%"out_reg1" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
783 "punpcklbw %%mm7, %%"out_reg2" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
784 "movd "s_offset"(%%"REG_S"), %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
785 "movd "s_offset"+4(%%"REG_S"), %%mm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
786 "punpcklbw %%mm7, %%mm0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
787 "punpcklbw %%mm7, %%mm4 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
788 "pmullw %%mm0, %%"out_reg1" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
789 "pmullw %%mm4, %%"out_reg2" \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
790
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
791 #define snow_inner_add_yblock_mmx_accum(ptr_offset, s_offset, d_offset) \
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
792 snow_inner_add_yblock_mmx_start("mm2", "mm6", ptr_offset, s_offset, d_offset)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
793 "paddusw %%mm2, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
794 "paddusw %%mm6, %%mm5 \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
795
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
796 #define snow_inner_add_yblock_mmx_mix(read_offset, write_offset)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
797 "mov %0, %%"REG_d" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
798 "psrlw $4, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
799 "psrlw $4, %%mm5 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
800 "paddw "read_offset"(%%"REG_D"), %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
801 "paddw "read_offset"+8(%%"REG_D"), %%mm5 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
802 "paddw %%mm3, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
803 "paddw %%mm3, %%mm5 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
804 "psraw $4, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
805 "psraw $4, %%mm5 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
806 "packuswb %%mm5, %%mm1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
807 "movq %%mm1, "write_offset"(%%"REG_d") \n\t"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
808
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
809 #define snow_inner_add_yblock_mmx_end(s_step)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
810 "add $"s_step", %%"REG_S" \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
811 "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
812 "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
813 "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
814 "add %%"REG_c", (%%"REG_a") \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
815 "add $"PTR_SIZE"*1, %1 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
816 "add %%"REG_c", %0 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
817 "dec %2 \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
818 "jnz 1b \n\t"\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
819 :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
820 :\
9194
6bf01d174630 Remove useless casting in asm "m" operand.
cehoyos
parents: 8590
diff changeset
821 "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
822 "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
823
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
824 static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
825 int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
826 snow_inner_add_yblock_mmx_header
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
827 snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
828 snow_inner_add_yblock_mmx_accum("2", "8", "0")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
829 snow_inner_add_yblock_mmx_accum("1", "128", "0")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
830 snow_inner_add_yblock_mmx_accum("0", "136", "0")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
831 snow_inner_add_yblock_mmx_mix("0", "0")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
832 snow_inner_add_yblock_mmx_end("16")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
833 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
834
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
835 static void inner_add_yblock_bw_16_obmc_32_mmx(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
836 int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
837 snow_inner_add_yblock_mmx_header
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
838 snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
839 snow_inner_add_yblock_mmx_accum("2", "16", "0")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
840 snow_inner_add_yblock_mmx_accum("1", "512", "0")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
841 snow_inner_add_yblock_mmx_accum("0", "528", "0")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
842 snow_inner_add_yblock_mmx_mix("0", "0")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
843
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
844 snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "8", "8")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
845 snow_inner_add_yblock_mmx_accum("2", "24", "8")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
846 snow_inner_add_yblock_mmx_accum("1", "520", "8")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
847 snow_inner_add_yblock_mmx_accum("0", "536", "8")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
848 snow_inner_add_yblock_mmx_mix("16", "8")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
849 snow_inner_add_yblock_mmx_end("32")
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
850 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
851
11485
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
852 static void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
853 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
854
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
855 if (b_w == 16)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
856 inner_add_yblock_bw_16_obmc_32_sse2(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
857 else if (b_w == 8 && obmc_stride == 16) {
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
858 if (!(b_h & 1))
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
859 inner_add_yblock_bw_8_obmc_16_bh_even_sse2(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
860 else
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
861 inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
862 } else
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
863 ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
864 }
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
865
11485
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
866 static void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
867 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
868 if (b_w == 16)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
869 inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
870 else if (b_w == 8 && obmc_stride == 16)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
871 inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
872 else
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
873 ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
874 }
11485
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
875
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
876 void ff_dwt_init_x86(DWTContext *c)
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
877 {
12475
9fef0a8ddd63 Move mm_support() from libavcodec to libavutil, make it a public
stefano
parents: 12456
diff changeset
878 int mm_flags = av_get_cpu_flags();
11485
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
879
12456
a5ddb39627fd Rename FF_MM_ symbols related to CPU features flags as AV_CPU_FLAG_
stefano
parents: 12414
diff changeset
880 if (mm_flags & AV_CPU_FLAG_MMX) {
a5ddb39627fd Rename FF_MM_ symbols related to CPU features flags as AV_CPU_FLAG_
stefano
parents: 12414
diff changeset
881 if(mm_flags & AV_CPU_FLAG_SSE2 & 0){
11485
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
882 c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
883 #if HAVE_7REGS
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
884 c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
885 #endif
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
886 c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
887 }
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
888 else{
12456
a5ddb39627fd Rename FF_MM_ symbols related to CPU features flags as AV_CPU_FLAG_
stefano
parents: 12414
diff changeset
889 if(mm_flags & AV_CPU_FLAG_MMX2){
11485
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
890 c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx;
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
891 #if HAVE_7REGS
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
892 c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
893 #endif
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
894 }
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
895 c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
896 }
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
897 }
0f0cd6b5791f Separate DWT from snow and dsputil
mru
parents: 11373
diff changeset
898 }