comparison x86/h264dsp_mmx.c @ 12437:b242eb86ea9a libavcodec

Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1 fate failures on Win64.
author rbultje
date Mon, 30 Aug 2010 16:31:04 +0000
parents fe78a4548d12
children 33ecda76b2f2
comparison
equal deleted inserted replaced
12436:d6d0a43848b4 12437:b242eb86ea9a
2103 #if HAVE_SSSE3 2103 #if HAVE_SSSE3
2104 H264_MC_816(H264_MC_H, ssse3) 2104 H264_MC_816(H264_MC_H, ssse3)
2105 H264_MC_816(H264_MC_HV, ssse3) 2105 H264_MC_816(H264_MC_HV, ssse3)
2106 #endif 2106 #endif
2107 2107
2108 /* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */
2109 DECLARE_ALIGNED(8, static const uint64_t, h264_rnd_reg)[4] = {
2110 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL
2111 };
2112
2113 #define H264_CHROMA_OP(S,D)
2114 #define H264_CHROMA_OP4(S,D,T)
2115 #define H264_CHROMA_MC8_TMPL put_h264_chroma_generic_mc8_mmx
2116 #define H264_CHROMA_MC4_TMPL put_h264_chroma_generic_mc4_mmx
2117 #define H264_CHROMA_MC2_TMPL put_h264_chroma_mc2_mmx2
2118 #define H264_CHROMA_MC8_MV0 put_pixels8_mmx
2119 #include "dsputil_h264_template_mmx.c"
2120
2121 static void put_h264_chroma_mc8_mmx_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2122 {
2123 put_h264_chroma_generic_mc8_mmx(dst, src, stride, h, x, y, h264_rnd_reg);
2124 }
2125 static void put_vc1_chroma_mc8_mmx_nornd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2126 {
2127 put_h264_chroma_generic_mc8_mmx(dst, src, stride, h, x, y, h264_rnd_reg+2);
2128 }
2129 static void put_h264_chroma_mc4_mmx(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2130 {
2131 put_h264_chroma_generic_mc4_mmx(dst, src, stride, h, x, y, h264_rnd_reg);
2132 }
2133
2134 #undef H264_CHROMA_OP
2135 #undef H264_CHROMA_OP4
2136 #undef H264_CHROMA_MC8_TMPL
2137 #undef H264_CHROMA_MC4_TMPL
2138 #undef H264_CHROMA_MC2_TMPL
2139 #undef H264_CHROMA_MC8_MV0
2140
2141 #define H264_CHROMA_OP(S,D) "pavgb " #S ", " #D " \n\t"
2142 #define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\
2143 "pavgb " #T ", " #D " \n\t"
2144 #define H264_CHROMA_MC8_TMPL avg_h264_chroma_generic_mc8_mmx2
2145 #define H264_CHROMA_MC4_TMPL avg_h264_chroma_generic_mc4_mmx2
2146 #define H264_CHROMA_MC2_TMPL avg_h264_chroma_mc2_mmx2
2147 #define H264_CHROMA_MC8_MV0 avg_pixels8_mmx2
2148 #include "dsputil_h264_template_mmx.c"
2149 static void avg_h264_chroma_mc8_mmx2_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2150 {
2151 avg_h264_chroma_generic_mc8_mmx2(dst, src, stride, h, x, y, h264_rnd_reg);
2152 }
2153 static void avg_vc1_chroma_mc8_mmx2_nornd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2154 {
2155 avg_h264_chroma_generic_mc8_mmx2(dst, src, stride, h, x, y, h264_rnd_reg+2);
2156 }
2157 static void avg_h264_chroma_mc4_mmx2(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2158 {
2159 avg_h264_chroma_generic_mc4_mmx2(dst, src, stride, h, x, y, h264_rnd_reg);
2160 }
2161 #undef H264_CHROMA_OP
2162 #undef H264_CHROMA_OP4
2163 #undef H264_CHROMA_MC8_TMPL
2164 #undef H264_CHROMA_MC4_TMPL
2165 #undef H264_CHROMA_MC2_TMPL
2166 #undef H264_CHROMA_MC8_MV0
2167
2168 #define H264_CHROMA_OP(S,D) "pavgusb " #S ", " #D " \n\t"
2169 #define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\
2170 "pavgusb " #T ", " #D " \n\t"
2171 #define H264_CHROMA_MC8_TMPL avg_h264_chroma_generic_mc8_3dnow
2172 #define H264_CHROMA_MC4_TMPL avg_h264_chroma_generic_mc4_3dnow
2173 #define H264_CHROMA_MC8_MV0 avg_pixels8_3dnow
2174 #include "dsputil_h264_template_mmx.c"
2175 static void avg_h264_chroma_mc8_3dnow_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2176 {
2177 avg_h264_chroma_generic_mc8_3dnow(dst, src, stride, h, x, y, h264_rnd_reg);
2178 }
2179 static void avg_h264_chroma_mc4_3dnow(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2180 {
2181 avg_h264_chroma_generic_mc4_3dnow(dst, src, stride, h, x, y, h264_rnd_reg);
2182 }
2183 #undef H264_CHROMA_OP
2184 #undef H264_CHROMA_OP4
2185 #undef H264_CHROMA_MC8_TMPL
2186 #undef H264_CHROMA_MC4_TMPL
2187 #undef H264_CHROMA_MC8_MV0
2188
2189 #if HAVE_SSSE3
2190 #define AVG_OP(X)
2191 #undef H264_CHROMA_MC8_TMPL
2192 #undef H264_CHROMA_MC4_TMPL
2193 #define H264_CHROMA_MC8_TMPL put_h264_chroma_mc8_ssse3
2194 #define H264_CHROMA_MC4_TMPL put_h264_chroma_mc4_ssse3
2195 #define H264_CHROMA_MC8_MV0 put_pixels8_mmx
2196 #include "dsputil_h264_template_ssse3.c"
2197 static void put_h264_chroma_mc8_ssse3_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2198 {
2199 put_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 1);
2200 }
2201 static void put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2202 {
2203 put_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 0);
2204 }
2205
2206 #undef AVG_OP
2207 #undef H264_CHROMA_MC8_TMPL
2208 #undef H264_CHROMA_MC4_TMPL
2209 #undef H264_CHROMA_MC8_MV0
2210 #define AVG_OP(X) X
2211 #define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_ssse3
2212 #define H264_CHROMA_MC4_TMPL avg_h264_chroma_mc4_ssse3
2213 #define H264_CHROMA_MC8_MV0 avg_pixels8_mmx2
2214 #include "dsputil_h264_template_ssse3.c"
2215 static void avg_h264_chroma_mc8_ssse3_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2216 {
2217 avg_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 1);
2218 }
2219 static void avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2220 {
2221 avg_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 0);
2222 }
2223 #undef AVG_OP
2224 #undef H264_CHROMA_MC8_TMPL
2225 #undef H264_CHROMA_MC4_TMPL
2226 #undef H264_CHROMA_MC8_MV0
2227 #endif
2228
2229 /***********************************/ 2108 /***********************************/
2230 /* weighted prediction */ 2109 /* weighted prediction */
2231 2110
2232 static inline void ff_h264_weight_WxH_mmx2(uint8_t *dst, int stride, int log2_denom, int weight, int offset, int w, int h) 2111 static inline void ff_h264_weight_WxH_mmx2(uint8_t *dst, int stride, int log2_denom, int weight, int offset, int w, int h)
2233 { 2112 {