Mercurial > libavcodec.hg
comparison x86/h264dsp_mmx.c @ 12437:b242eb86ea9a libavcodec
Move H264 chroma MC from inline asm to yasm. This fixes VP3/5/6 and VC-1
fate failures on Win64.
author | rbultje |
---|---|
date | Mon, 30 Aug 2010 16:31:04 +0000 |
parents | fe78a4548d12 |
children | 33ecda76b2f2 |
comparison
equal
deleted
inserted
replaced
12436:d6d0a43848b4 | 12437:b242eb86ea9a |
---|---|
2103 #if HAVE_SSSE3 | 2103 #if HAVE_SSSE3 |
2104 H264_MC_816(H264_MC_H, ssse3) | 2104 H264_MC_816(H264_MC_H, ssse3) |
2105 H264_MC_816(H264_MC_HV, ssse3) | 2105 H264_MC_816(H264_MC_HV, ssse3) |
2106 #endif | 2106 #endif |
2107 | 2107 |
2108 /* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */ | |
2109 DECLARE_ALIGNED(8, static const uint64_t, h264_rnd_reg)[4] = { | |
2110 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL | |
2111 }; | |
2112 | |
2113 #define H264_CHROMA_OP(S,D) | |
2114 #define H264_CHROMA_OP4(S,D,T) | |
2115 #define H264_CHROMA_MC8_TMPL put_h264_chroma_generic_mc8_mmx | |
2116 #define H264_CHROMA_MC4_TMPL put_h264_chroma_generic_mc4_mmx | |
2117 #define H264_CHROMA_MC2_TMPL put_h264_chroma_mc2_mmx2 | |
2118 #define H264_CHROMA_MC8_MV0 put_pixels8_mmx | |
2119 #include "dsputil_h264_template_mmx.c" | |
2120 | |
2121 static void put_h264_chroma_mc8_mmx_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) | |
2122 { | |
2123 put_h264_chroma_generic_mc8_mmx(dst, src, stride, h, x, y, h264_rnd_reg); | |
2124 } | |
2125 static void put_vc1_chroma_mc8_mmx_nornd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) | |
2126 { | |
2127 put_h264_chroma_generic_mc8_mmx(dst, src, stride, h, x, y, h264_rnd_reg+2); | |
2128 } | |
2129 static void put_h264_chroma_mc4_mmx(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) | |
2130 { | |
2131 put_h264_chroma_generic_mc4_mmx(dst, src, stride, h, x, y, h264_rnd_reg); | |
2132 } | |
2133 | |
2134 #undef H264_CHROMA_OP | |
2135 #undef H264_CHROMA_OP4 | |
2136 #undef H264_CHROMA_MC8_TMPL | |
2137 #undef H264_CHROMA_MC4_TMPL | |
2138 #undef H264_CHROMA_MC2_TMPL | |
2139 #undef H264_CHROMA_MC8_MV0 | |
2140 | |
2141 #define H264_CHROMA_OP(S,D) "pavgb " #S ", " #D " \n\t" | |
2142 #define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\ | |
2143 "pavgb " #T ", " #D " \n\t" | |
2144 #define H264_CHROMA_MC8_TMPL avg_h264_chroma_generic_mc8_mmx2 | |
2145 #define H264_CHROMA_MC4_TMPL avg_h264_chroma_generic_mc4_mmx2 | |
2146 #define H264_CHROMA_MC2_TMPL avg_h264_chroma_mc2_mmx2 | |
2147 #define H264_CHROMA_MC8_MV0 avg_pixels8_mmx2 | |
2148 #include "dsputil_h264_template_mmx.c" | |
2149 static void avg_h264_chroma_mc8_mmx2_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) | |
2150 { | |
2151 avg_h264_chroma_generic_mc8_mmx2(dst, src, stride, h, x, y, h264_rnd_reg); | |
2152 } | |
2153 static void avg_vc1_chroma_mc8_mmx2_nornd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) | |
2154 { | |
2155 avg_h264_chroma_generic_mc8_mmx2(dst, src, stride, h, x, y, h264_rnd_reg+2); | |
2156 } | |
2157 static void avg_h264_chroma_mc4_mmx2(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) | |
2158 { | |
2159 avg_h264_chroma_generic_mc4_mmx2(dst, src, stride, h, x, y, h264_rnd_reg); | |
2160 } | |
2161 #undef H264_CHROMA_OP | |
2162 #undef H264_CHROMA_OP4 | |
2163 #undef H264_CHROMA_MC8_TMPL | |
2164 #undef H264_CHROMA_MC4_TMPL | |
2165 #undef H264_CHROMA_MC2_TMPL | |
2166 #undef H264_CHROMA_MC8_MV0 | |
2167 | |
2168 #define H264_CHROMA_OP(S,D) "pavgusb " #S ", " #D " \n\t" | |
2169 #define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\ | |
2170 "pavgusb " #T ", " #D " \n\t" | |
2171 #define H264_CHROMA_MC8_TMPL avg_h264_chroma_generic_mc8_3dnow | |
2172 #define H264_CHROMA_MC4_TMPL avg_h264_chroma_generic_mc4_3dnow | |
2173 #define H264_CHROMA_MC8_MV0 avg_pixels8_3dnow | |
2174 #include "dsputil_h264_template_mmx.c" | |
2175 static void avg_h264_chroma_mc8_3dnow_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) | |
2176 { | |
2177 avg_h264_chroma_generic_mc8_3dnow(dst, src, stride, h, x, y, h264_rnd_reg); | |
2178 } | |
2179 static void avg_h264_chroma_mc4_3dnow(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) | |
2180 { | |
2181 avg_h264_chroma_generic_mc4_3dnow(dst, src, stride, h, x, y, h264_rnd_reg); | |
2182 } | |
2183 #undef H264_CHROMA_OP | |
2184 #undef H264_CHROMA_OP4 | |
2185 #undef H264_CHROMA_MC8_TMPL | |
2186 #undef H264_CHROMA_MC4_TMPL | |
2187 #undef H264_CHROMA_MC8_MV0 | |
2188 | |
2189 #if HAVE_SSSE3 | |
2190 #define AVG_OP(X) | |
2191 #undef H264_CHROMA_MC8_TMPL | |
2192 #undef H264_CHROMA_MC4_TMPL | |
2193 #define H264_CHROMA_MC8_TMPL put_h264_chroma_mc8_ssse3 | |
2194 #define H264_CHROMA_MC4_TMPL put_h264_chroma_mc4_ssse3 | |
2195 #define H264_CHROMA_MC8_MV0 put_pixels8_mmx | |
2196 #include "dsputil_h264_template_ssse3.c" | |
2197 static void put_h264_chroma_mc8_ssse3_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) | |
2198 { | |
2199 put_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 1); | |
2200 } | |
2201 static void put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) | |
2202 { | |
2203 put_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 0); | |
2204 } | |
2205 | |
2206 #undef AVG_OP | |
2207 #undef H264_CHROMA_MC8_TMPL | |
2208 #undef H264_CHROMA_MC4_TMPL | |
2209 #undef H264_CHROMA_MC8_MV0 | |
2210 #define AVG_OP(X) X | |
2211 #define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_ssse3 | |
2212 #define H264_CHROMA_MC4_TMPL avg_h264_chroma_mc4_ssse3 | |
2213 #define H264_CHROMA_MC8_MV0 avg_pixels8_mmx2 | |
2214 #include "dsputil_h264_template_ssse3.c" | |
2215 static void avg_h264_chroma_mc8_ssse3_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) | |
2216 { | |
2217 avg_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 1); | |
2218 } | |
2219 static void avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) | |
2220 { | |
2221 avg_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 0); | |
2222 } | |
2223 #undef AVG_OP | |
2224 #undef H264_CHROMA_MC8_TMPL | |
2225 #undef H264_CHROMA_MC4_TMPL | |
2226 #undef H264_CHROMA_MC8_MV0 | |
2227 #endif | |
2228 | |
2229 /***********************************/ | 2108 /***********************************/ |
2230 /* weighted prediction */ | 2109 /* weighted prediction */ |
2231 | 2110 |
2232 static inline void ff_h264_weight_WxH_mmx2(uint8_t *dst, int stride, int log2_denom, int weight, int offset, int w, int h) | 2111 static inline void ff_h264_weight_WxH_mmx2(uint8_t *dst, int stride, int log2_denom, int weight, int offset, int w, int h) |
2233 { | 2112 { |