comparison x86/h264dsp_mmx.c @ 8519:cc64e1343397 libavcodec

Use H264 MMX chroma functions to accelerate RV40 decoding. Patch by Mathieu Velten (matmaul A gmail)
author cehoyos
date Sun, 04 Jan 2009 01:36:11 +0000
parents cea216e44ee3
children 7a463923ecd1
comparison
equal deleted inserted replaced
8518:f2c406b05158 8519:cc64e1343397
2082 #ifdef HAVE_SSSE3 2082 #ifdef HAVE_SSSE3
2083 H264_MC_816(H264_MC_H, ssse3) 2083 H264_MC_816(H264_MC_H, ssse3)
2084 H264_MC_816(H264_MC_HV, ssse3) 2084 H264_MC_816(H264_MC_HV, ssse3)
2085 #endif 2085 #endif
2086 2086
2087 /* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */
2088 DECLARE_ALIGNED_8(static const uint64_t, h264_rnd_reg[4]) = {
2089 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL
2090 };
2087 2091
2088 #define H264_CHROMA_OP(S,D) 2092 #define H264_CHROMA_OP(S,D)
2089 #define H264_CHROMA_OP4(S,D,T) 2093 #define H264_CHROMA_OP4(S,D,T)
2090 #define H264_CHROMA_MC8_TMPL put_h264_chroma_mc8_mmx 2094 #define H264_CHROMA_MC8_TMPL put_h264_chroma_generic_mc8_mmx
2091 #define H264_CHROMA_MC4_TMPL put_h264_chroma_mc4_mmx 2095 #define H264_CHROMA_MC4_TMPL put_h264_chroma_generic_mc4_mmx
2092 #define H264_CHROMA_MC2_TMPL put_h264_chroma_mc2_mmx2 2096 #define H264_CHROMA_MC2_TMPL put_h264_chroma_mc2_mmx2
2093 #define H264_CHROMA_MC8_MV0 put_pixels8_mmx 2097 #define H264_CHROMA_MC8_MV0 put_pixels8_mmx
2094 #include "dsputil_h264_template_mmx.c" 2098 #include "dsputil_h264_template_mmx.c"
2095 2099
2096 static void put_h264_chroma_mc8_mmx_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) 2100 static void put_h264_chroma_mc8_mmx_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2097 { 2101 {
2098 put_h264_chroma_mc8_mmx(dst, src, stride, h, x, y, 1); 2102 put_h264_chroma_generic_mc8_mmx(dst, src, stride, h, x, y, h264_rnd_reg);
2099 } 2103 }
2100 static void put_h264_chroma_mc8_mmx_nornd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) 2104 static void put_h264_chroma_mc8_mmx_nornd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2101 { 2105 {
2102 put_h264_chroma_mc8_mmx(dst, src, stride, h, x, y, 0); 2106 put_h264_chroma_generic_mc8_mmx(dst, src, stride, h, x, y, h264_rnd_reg+2);
2107 }
2108 static void put_h264_chroma_mc4_mmx(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2109 {
2110 put_h264_chroma_generic_mc4_mmx(dst, src, stride, h, x, y, h264_rnd_reg);
2103 } 2111 }
2104 2112
2105 #undef H264_CHROMA_OP 2113 #undef H264_CHROMA_OP
2106 #undef H264_CHROMA_OP4 2114 #undef H264_CHROMA_OP4
2107 #undef H264_CHROMA_MC8_TMPL 2115 #undef H264_CHROMA_MC8_TMPL
2110 #undef H264_CHROMA_MC8_MV0 2118 #undef H264_CHROMA_MC8_MV0
2111 2119
2112 #define H264_CHROMA_OP(S,D) "pavgb " #S ", " #D " \n\t" 2120 #define H264_CHROMA_OP(S,D) "pavgb " #S ", " #D " \n\t"
2113 #define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\ 2121 #define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\
2114 "pavgb " #T ", " #D " \n\t" 2122 "pavgb " #T ", " #D " \n\t"
2115 #define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_mmx2 2123 #define H264_CHROMA_MC8_TMPL avg_h264_chroma_generic_mc8_mmx2
2116 #define H264_CHROMA_MC4_TMPL avg_h264_chroma_mc4_mmx2 2124 #define H264_CHROMA_MC4_TMPL avg_h264_chroma_generic_mc4_mmx2
2117 #define H264_CHROMA_MC2_TMPL avg_h264_chroma_mc2_mmx2 2125 #define H264_CHROMA_MC2_TMPL avg_h264_chroma_mc2_mmx2
2118 #define H264_CHROMA_MC8_MV0 avg_pixels8_mmx2 2126 #define H264_CHROMA_MC8_MV0 avg_pixels8_mmx2
2119 #include "dsputil_h264_template_mmx.c" 2127 #include "dsputil_h264_template_mmx.c"
2120 static void avg_h264_chroma_mc8_mmx2_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) 2128 static void avg_h264_chroma_mc8_mmx2_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2121 { 2129 {
2122 avg_h264_chroma_mc8_mmx2(dst, src, stride, h, x, y, 1); 2130 avg_h264_chroma_generic_mc8_mmx2(dst, src, stride, h, x, y, h264_rnd_reg);
2131 }
2132 static void avg_h264_chroma_mc4_mmx2(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2133 {
2134 avg_h264_chroma_generic_mc4_mmx2(dst, src, stride, h, x, y, h264_rnd_reg);
2123 } 2135 }
2124 #undef H264_CHROMA_OP 2136 #undef H264_CHROMA_OP
2125 #undef H264_CHROMA_OP4 2137 #undef H264_CHROMA_OP4
2126 #undef H264_CHROMA_MC8_TMPL 2138 #undef H264_CHROMA_MC8_TMPL
2127 #undef H264_CHROMA_MC4_TMPL 2139 #undef H264_CHROMA_MC4_TMPL
2129 #undef H264_CHROMA_MC8_MV0 2141 #undef H264_CHROMA_MC8_MV0
2130 2142
2131 #define H264_CHROMA_OP(S,D) "pavgusb " #S ", " #D " \n\t" 2143 #define H264_CHROMA_OP(S,D) "pavgusb " #S ", " #D " \n\t"
2132 #define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\ 2144 #define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\
2133 "pavgusb " #T ", " #D " \n\t" 2145 "pavgusb " #T ", " #D " \n\t"
2134 #define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_3dnow 2146 #define H264_CHROMA_MC8_TMPL avg_h264_chroma_generic_mc8_3dnow
2135 #define H264_CHROMA_MC4_TMPL avg_h264_chroma_mc4_3dnow 2147 #define H264_CHROMA_MC4_TMPL avg_h264_chroma_generic_mc4_3dnow
2136 #define H264_CHROMA_MC8_MV0 avg_pixels8_3dnow 2148 #define H264_CHROMA_MC8_MV0 avg_pixels8_3dnow
2137 #include "dsputil_h264_template_mmx.c" 2149 #include "dsputil_h264_template_mmx.c"
2138 static void avg_h264_chroma_mc8_3dnow_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) 2150 static void avg_h264_chroma_mc8_3dnow_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2139 { 2151 {
2140 avg_h264_chroma_mc8_3dnow(dst, src, stride, h, x, y, 1); 2152 avg_h264_chroma_generic_mc8_3dnow(dst, src, stride, h, x, y, h264_rnd_reg);
2153 }
2154 static void avg_h264_chroma_mc4_3dnow(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
2155 {
2156 avg_h264_chroma_generic_mc4_3dnow(dst, src, stride, h, x, y, h264_rnd_reg);
2141 } 2157 }
2142 #undef H264_CHROMA_OP 2158 #undef H264_CHROMA_OP
2143 #undef H264_CHROMA_OP4 2159 #undef H264_CHROMA_OP4
2144 #undef H264_CHROMA_MC8_TMPL 2160 #undef H264_CHROMA_MC8_TMPL
2145 #undef H264_CHROMA_MC4_TMPL 2161 #undef H264_CHROMA_MC4_TMPL