Mercurial > libavcodec.hg
comparison i386/mpegvideo_mmx_template.c @ 2979:bfabfdf9ce55 libavcodec
COSMETICS: tabs --> spaces, some prettyprinting
author | diego |
---|---|
date | Thu, 22 Dec 2005 01:10:11 +0000 |
parents | ef2149182f1c |
children | 0b546eab515d |
comparison
equal
deleted
inserted
replaced
2978:403183bbb505 | 2979:bfabfdf9ce55 |
---|---|
19 */ | 19 */ |
20 #undef SPREADW | 20 #undef SPREADW |
21 #undef PMAXW | 21 #undef PMAXW |
22 #ifdef HAVE_MMX2 | 22 #ifdef HAVE_MMX2 |
23 #define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t" | 23 #define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t" |
24 #define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t" | 24 #define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t" |
25 #define PMAX(a,b) \ | 25 #define PMAX(a,b) \ |
26 "pshufw $0x0E," #a ", " #b " \n\t"\ | 26 "pshufw $0x0E," #a ", " #b " \n\t"\ |
27 PMAXW(b, a)\ | 27 PMAXW(b, a)\ |
28 "pshufw $0x01," #a ", " #b " \n\t"\ | 28 "pshufw $0x01," #a ", " #b " \n\t"\ |
29 PMAXW(b, a) | 29 PMAXW(b, a) |
30 #else | 30 #else |
31 #define SPREADW(a) \ | 31 #define SPREADW(a) \ |
32 "punpcklwd " #a ", " #a " \n\t"\ | 32 "punpcklwd " #a ", " #a " \n\t"\ |
33 "punpcklwd " #a ", " #a " \n\t" | 33 "punpcklwd " #a ", " #a " \n\t" |
34 #define PMAXW(a,b) \ | 34 #define PMAXW(a,b) \ |
35 "psubusw " #a ", " #b " \n\t"\ | 35 "psubusw " #a ", " #b " \n\t"\ |
36 "paddw " #a ", " #b " \n\t" | 36 "paddw " #a ", " #b " \n\t" |
37 #define PMAX(a,b) \ | 37 #define PMAX(a,b) \ |
38 "movq " #a ", " #b " \n\t"\ | 38 "movq " #a ", " #b " \n\t"\ |
39 "psrlq $32, " #a " \n\t"\ | 39 "psrlq $32, " #a " \n\t"\ |
40 PMAXW(b, a)\ | 40 PMAXW(b, a)\ |
41 "movq " #a ", " #b " \n\t"\ | 41 "movq " #a ", " #b " \n\t"\ |
42 "psrlq $16, " #a " \n\t"\ | 42 "psrlq $16, " #a " \n\t"\ |
43 PMAXW(b, a) | 43 PMAXW(b, a) |
44 | 44 |
45 #endif | 45 #endif |
46 | 46 |
47 static int RENAME(dct_quantize)(MpegEncContext *s, | 47 static int RENAME(dct_quantize)(MpegEncContext *s, |
48 DCTELEM *block, int n, | 48 DCTELEM *block, int n, |
69 q = s->c_dc_scale; | 69 q = s->c_dc_scale; |
70 /* note: block[0] is assumed to be positive */ | 70 /* note: block[0] is assumed to be positive */ |
71 if (!s->h263_aic) { | 71 if (!s->h263_aic) { |
72 #if 1 | 72 #if 1 |
73 asm volatile ( | 73 asm volatile ( |
74 "mul %%ecx \n\t" | 74 "mul %%ecx \n\t" |
75 : "=d" (level), "=a"(dummy) | 75 : "=d" (level), "=a"(dummy) |
76 : "a" ((block[0]>>2) + q), "c" (inverse[q<<1]) | 76 : "a" ((block[0]>>2) + q), "c" (inverse[q<<1]) |
77 ); | 77 ); |
78 #else | 78 #else |
79 asm volatile ( | 79 asm volatile ( |
80 "xorl %%edx, %%edx \n\t" | 80 "xorl %%edx, %%edx \n\t" |
81 "divw %%cx \n\t" | 81 "divw %%cx \n\t" |
82 "movzwl %%ax, %%eax \n\t" | 82 "movzwl %%ax, %%eax \n\t" |
83 : "=a" (level) | 83 : "=a" (level) |
84 : "a" ((block[0]>>2) + q), "c" (q<<1) | 84 : "a" ((block[0]>>2) + q), "c" (q<<1) |
85 : "%edx" | 85 : "%edx" |
86 ); | 86 ); |
87 #endif | 87 #endif |
88 } else | 88 } else |
89 /* For AIC we skip quant/dequant of INTRADC */ | 89 /* For AIC we skip quant/dequant of INTRADC */ |
90 level = (block[0] + 4)>>3; | 90 level = (block[0] + 4)>>3; |
101 } | 101 } |
102 | 102 |
103 if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){ | 103 if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){ |
104 | 104 |
105 asm volatile( | 105 asm volatile( |
106 "movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1 | 106 "movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1 |
107 SPREADW(%%mm3) | 107 SPREADW(%%mm3) |
108 "pxor %%mm7, %%mm7 \n\t" // 0 | 108 "pxor %%mm7, %%mm7 \n\t" // 0 |
109 "pxor %%mm4, %%mm4 \n\t" // 0 | 109 "pxor %%mm4, %%mm4 \n\t" // 0 |
110 "movq (%2), %%mm5 \n\t" // qmat[0] | 110 "movq (%2), %%mm5 \n\t" // qmat[0] |
111 "pxor %%mm6, %%mm6 \n\t" | 111 "pxor %%mm6, %%mm6 \n\t" |
112 "psubw (%3), %%mm6 \n\t" // -bias[0] | 112 "psubw (%3), %%mm6 \n\t" // -bias[0] |
113 "mov $-128, %%"REG_a" \n\t" | 113 "mov $-128, %%"REG_a" \n\t" |
114 ".balign 16 \n\t" | 114 ".balign 16 \n\t" |
115 "1: \n\t" | 115 "1: \n\t" |
116 "pxor %%mm1, %%mm1 \n\t" // 0 | 116 "pxor %%mm1, %%mm1 \n\t" // 0 |
117 "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] | 117 "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] |
118 "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 | 118 "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 |
119 "pxor %%mm1, %%mm0 \n\t" | 119 "pxor %%mm1, %%mm0 \n\t" |
120 "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) | 120 "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) |
121 "psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] | 121 "psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] |
122 "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 | 122 "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 |
123 "por %%mm0, %%mm4 \n\t" | 123 "por %%mm0, %%mm4 \n\t" |
124 "pxor %%mm1, %%mm0 \n\t" | 124 "pxor %%mm1, %%mm0 \n\t" |
125 "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) | 125 "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) |
126 "movq %%mm0, (%5, %%"REG_a") \n\t" | 126 "movq %%mm0, (%5, %%"REG_a") \n\t" |
127 "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 | 127 "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 |
128 "movq (%4, %%"REG_a"), %%mm1 \n\t" | 128 "movq (%4, %%"REG_a"), %%mm1 \n\t" |
129 "movq %%mm7, (%1, %%"REG_a") \n\t" // 0 | 129 "movq %%mm7, (%1, %%"REG_a") \n\t" // 0 |
130 "pandn %%mm1, %%mm0 \n\t" | 130 "pandn %%mm1, %%mm0 \n\t" |
131 PMAXW(%%mm0, %%mm3) | 131 PMAXW(%%mm0, %%mm3) |
132 "add $8, %%"REG_a" \n\t" | 132 "add $8, %%"REG_a" \n\t" |
133 " js 1b \n\t" | 133 " js 1b \n\t" |
134 PMAX(%%mm3, %%mm0) | 134 PMAX(%%mm3, %%mm0) |
135 "movd %%mm3, %%"REG_a" \n\t" | 135 "movd %%mm3, %%"REG_a" \n\t" |
136 "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 | 136 "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 |
137 : "+a" (last_non_zero_p1) | 137 : "+a" (last_non_zero_p1) |
138 : "r" (block+64), "r" (qmat), "r" (bias), | 138 : "r" (block+64), "r" (qmat), "r" (bias), |
139 "r" (inv_zigzag_direct16+64), "r" (temp_block+64) | 139 "r" (inv_zigzag_direct16+64), "r" (temp_block+64) |
140 ); | 140 ); |
141 // note the asm is split cuz gcc doesnt like that many operands ... | 141 // note the asm is split cuz gcc doesnt like that many operands ... |
142 asm volatile( | 142 asm volatile( |
143 "movd %1, %%mm1 \n\t" // max_qcoeff | 143 "movd %1, %%mm1 \n\t" // max_qcoeff |
144 SPREADW(%%mm1) | 144 SPREADW(%%mm1) |
145 "psubusw %%mm1, %%mm4 \n\t" | 145 "psubusw %%mm1, %%mm4 \n\t" |
146 "packuswb %%mm4, %%mm4 \n\t" | 146 "packuswb %%mm4, %%mm4 \n\t" |
147 "movd %%mm4, %0 \n\t" // *overflow | 147 "movd %%mm4, %0 \n\t" // *overflow |
148 : "=g" (*overflow) | 148 : "=g" (*overflow) |
149 : "g" (s->max_qcoeff) | 149 : "g" (s->max_qcoeff) |
150 ); | 150 ); |
151 }else{ // FMT_H263 | 151 }else{ // FMT_H263 |
152 asm volatile( | 152 asm volatile( |
153 "movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1 | 153 "movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1 |
154 SPREADW(%%mm3) | 154 SPREADW(%%mm3) |
155 "pxor %%mm7, %%mm7 \n\t" // 0 | 155 "pxor %%mm7, %%mm7 \n\t" // 0 |
156 "pxor %%mm4, %%mm4 \n\t" // 0 | 156 "pxor %%mm4, %%mm4 \n\t" // 0 |
157 "mov $-128, %%"REG_a" \n\t" | 157 "mov $-128, %%"REG_a" \n\t" |
158 ".balign 16 \n\t" | 158 ".balign 16 \n\t" |
159 "1: \n\t" | 159 "1: \n\t" |
160 "pxor %%mm1, %%mm1 \n\t" // 0 | 160 "pxor %%mm1, %%mm1 \n\t" // 0 |
161 "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] | 161 "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] |
162 "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 | 162 "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 |
163 "pxor %%mm1, %%mm0 \n\t" | 163 "pxor %%mm1, %%mm0 \n\t" |
164 "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) | 164 "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) |
165 "movq (%3, %%"REG_a"), %%mm6 \n\t" // bias[0] | 165 "movq (%3, %%"REG_a"), %%mm6 \n\t" // bias[0] |
166 "paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] | 166 "paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] |
167 "movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i] | 167 "movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i] |
168 "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 | 168 "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 |
169 "por %%mm0, %%mm4 \n\t" | 169 "por %%mm0, %%mm4 \n\t" |
170 "pxor %%mm1, %%mm0 \n\t" | 170 "pxor %%mm1, %%mm0 \n\t" |
171 "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) | 171 "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) |
172 "movq %%mm0, (%5, %%"REG_a") \n\t" | 172 "movq %%mm0, (%5, %%"REG_a") \n\t" |
173 "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 | 173 "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 |
174 "movq (%4, %%"REG_a"), %%mm1 \n\t" | 174 "movq (%4, %%"REG_a"), %%mm1 \n\t" |
175 "movq %%mm7, (%1, %%"REG_a") \n\t" // 0 | 175 "movq %%mm7, (%1, %%"REG_a") \n\t" // 0 |
176 "pandn %%mm1, %%mm0 \n\t" | 176 "pandn %%mm1, %%mm0 \n\t" |
177 PMAXW(%%mm0, %%mm3) | 177 PMAXW(%%mm0, %%mm3) |
178 "add $8, %%"REG_a" \n\t" | 178 "add $8, %%"REG_a" \n\t" |
179 " js 1b \n\t" | 179 " js 1b \n\t" |
180 PMAX(%%mm3, %%mm0) | 180 PMAX(%%mm3, %%mm0) |
181 "movd %%mm3, %%"REG_a" \n\t" | 181 "movd %%mm3, %%"REG_a" \n\t" |
182 "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 | 182 "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 |
183 : "+a" (last_non_zero_p1) | 183 : "+a" (last_non_zero_p1) |
184 : "r" (block+64), "r" (qmat+64), "r" (bias+64), | 184 : "r" (block+64), "r" (qmat+64), "r" (bias+64), |
185 "r" (inv_zigzag_direct16+64), "r" (temp_block+64) | 185 "r" (inv_zigzag_direct16+64), "r" (temp_block+64) |
186 ); | 186 ); |
187 // note the asm is split cuz gcc doesnt like that many operands ... | 187 // note the asm is split cuz gcc doesnt like that many operands ... |
188 asm volatile( | 188 asm volatile( |
189 "movd %1, %%mm1 \n\t" // max_qcoeff | 189 "movd %1, %%mm1 \n\t" // max_qcoeff |
190 SPREADW(%%mm1) | 190 SPREADW(%%mm1) |
191 "psubusw %%mm1, %%mm4 \n\t" | 191 "psubusw %%mm1, %%mm4 \n\t" |
192 "packuswb %%mm4, %%mm4 \n\t" | 192 "packuswb %%mm4, %%mm4 \n\t" |
193 "movd %%mm4, %0 \n\t" // *overflow | 193 "movd %%mm4, %0 \n\t" // *overflow |
194 : "=g" (*overflow) | 194 : "=g" (*overflow) |
195 : "g" (s->max_qcoeff) | 195 : "g" (s->max_qcoeff) |
196 ); | 196 ); |
197 } | 197 } |
198 | 198 |