Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 446:efe0c0d40577 libavcodec
* reenabled original xy2 put routine - rounding error is really bad with
the new code
* added PAVGP macros for parallel processing to safe few more cycles on
celerons
author | kabi |
---|---|
date | Wed, 29 May 2002 19:57:21 +0000 |
parents | 62c01dbdc1e0 |
children | e8c8ca9106aa |
comparison
equal
deleted
inserted
replaced
445:62c01dbdc1e0 | 446:efe0c0d40577 |
---|---|
105 "pxor " #rega ", " #regb " \n\t"\ | 105 "pxor " #rega ", " #regb " \n\t"\ |
106 "pand %%mm7, " #regb " \n\t"\ | 106 "pand %%mm7, " #regb " \n\t"\ |
107 "psrlq $1, " #regb " \n\t"\ | 107 "psrlq $1, " #regb " \n\t"\ |
108 "psubb " #regb ", " #regr " \n\t" | 108 "psubb " #regb ", " #regr " \n\t" |
109 | 109 |
110 #define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \ | |
111 "movq " #rega ", " #regr " \n\t"\ | |
112 "movq " #regc ", " #regp " \n\t"\ | |
113 "pand " #regb ", " #regr " \n\t"\ | |
114 "pand " #regd ", " #regp " \n\t"\ | |
115 "pxor " #rega ", " #regb " \n\t"\ | |
116 "pxor " #regc ", " #regd " \n\t"\ | |
117 "pand %%mm7, " #regb " \n\t"\ | |
118 "pand %%mm7, " #regd " \n\t"\ | |
119 "psrlq $1, " #regb " \n\t"\ | |
120 "psrlq $1, " #regd " \n\t"\ | |
121 "paddb " #regb ", " #regr " \n\t"\ | |
122 "paddb " #regd ", " #regp " \n\t" | |
123 | |
124 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \ | |
125 "movq " #rega ", " #regr " \n\t"\ | |
126 "movq " #regc ", " #regp " \n\t"\ | |
127 "por " #regb ", " #regr " \n\t"\ | |
128 "por " #regd ", " #regp " \n\t"\ | |
129 "pxor " #rega ", " #regb " \n\t"\ | |
130 "pxor " #regc ", " #regd " \n\t"\ | |
131 "pand %%mm7, " #regb " \n\t"\ | |
132 "pand %%mm7, " #regd " \n\t"\ | |
133 "psrlq $1, " #regd " \n\t"\ | |
134 "psrlq $1, " #regb " \n\t"\ | |
135 "psubb " #regb ", " #regr " \n\t"\ | |
136 "psubb " #regd ", " #regp " \n\t" | |
137 | |
110 /***********************************/ | 138 /***********************************/ |
111 /* MMX no rounding */ | 139 /* MMX no rounding */ |
112 #define DEF(x, y) x ## _no_rnd_ ## y ##_mmx | 140 #define DEF(x, y) x ## _no_rnd_ ## y ##_mmx |
113 | 141 |
114 #define PAVGB(a, b) PAVGB_MMX_NO_RND(a, b, %%mm6) | 142 #define PAVGB(a, b) PAVGB_MMX_NO_RND(a, b, %%mm6) |
115 #define PAVGBR(a, b, c) PAVGB_MMX_NO_RND(a, b, c) | 143 #define PAVGBR(a, b, c) PAVGB_MMX_NO_RND(a, b, c) |
144 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f) | |
116 #include "dsputil_mmx_rnd.h" | 145 #include "dsputil_mmx_rnd.h" |
117 | 146 |
118 #undef DEF | 147 #undef DEF |
119 #undef PAVGB | 148 #undef PAVGB |
120 #undef PAVGBR | 149 #undef PAVGBR |
150 #undef PAVGBP | |
121 /***********************************/ | 151 /***********************************/ |
122 /* MMX rounding */ | 152 /* MMX rounding */ |
123 | 153 |
124 #define DEF(x, y) x ## _ ## y ##_mmx | 154 #define DEF(x, y) x ## _ ## y ##_mmx |
125 | 155 |
126 #define PAVGB(a, b) PAVGB_MMX(a, b, %%mm6) | 156 #define PAVGB(a, b) PAVGB_MMX(a, b, %%mm6) |
127 #define PAVGBR(a, b, c) PAVGB_MMX(a, b, c) | 157 #define PAVGBR(a, b, c) PAVGB_MMX(a, b, c) |
158 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f) | |
128 #include "dsputil_mmx_rnd.h" | 159 #include "dsputil_mmx_rnd.h" |
129 | 160 |
130 #undef DEF | 161 #undef DEF |
131 #undef PAVGB | 162 #undef PAVGB |
132 #undef PAVGBR | 163 #undef PAVGBR |
164 #undef PAVGBP | |
133 | 165 |
134 /***********************************/ | 166 /***********************************/ |
135 /* 3Dnow specific */ | 167 /* 3Dnow specific */ |
136 | 168 |
137 #define DEF(x) x ## _3dnow | 169 #define DEF(x) x ## _3dnow |
337 : "r"(line_size) | 369 : "r"(line_size) |
338 : "%eax", "memory" | 370 : "%eax", "memory" |
339 ); | 371 ); |
340 } | 372 } |
341 | 373 |
342 #if 0 | 374 #if 1 |
343 static void put_pixels_xy2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 375 static void put_pixels_xy2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) |
344 { | 376 { |
345 UINT8 *p; | 377 UINT8 *p; |
346 const UINT8 *pix; | 378 const UINT8 *pix; |
347 p = block; | 379 p = block; |