Mercurial > libavcodec.hg
comparison imgconvert.c @ 12316:b01d01738bc4 libavcodec
Convert deinterlacing MMX code to YASM
author | vitor |
---|---|
date | Sat, 31 Jul 2010 14:50:51 +0000 |
parents | 6c42dc376222 |
children | 0046ee0d227c |
comparison
equal
deleted
inserted
replaced
12315:f903ba955ae7 | 12316:b01d01738bc4 |
---|---|
37 #include "libavutil/colorspace.h" | 37 #include "libavutil/colorspace.h" |
38 #include "libavutil/pixdesc.h" | 38 #include "libavutil/pixdesc.h" |
39 #include "libavcore/imgutils.h" | 39 #include "libavcore/imgutils.h" |
40 | 40 |
41 #if HAVE_MMX | 41 #if HAVE_MMX |
42 #include "x86/mmx.h" | |
43 #include "x86/dsputil_mmx.h" | 42 #include "x86/dsputil_mmx.h" |
44 #endif | 43 #endif |
45 | 44 |
46 #define xglue(x, y) x ## y | 45 #define xglue(x, y) x ## y |
47 #define glue(x, y) xglue(x, y) | 46 #define glue(x, y) xglue(x, y) |
52 #define FF_COLOR_YUV_JPEG 3 /**< YUV color space. 0 <= Y <= 255, 0 <= U, V <= 255 */ | 51 #define FF_COLOR_YUV_JPEG 3 /**< YUV color space. 0 <= Y <= 255, 0 <= U, V <= 255 */ |
53 | 52 |
54 #define FF_PIXEL_PLANAR 0 /**< each channel has one component in AVPicture */ | 53 #define FF_PIXEL_PLANAR 0 /**< each channel has one component in AVPicture */ |
55 #define FF_PIXEL_PACKED 1 /**< only one components containing all the channels */ | 54 #define FF_PIXEL_PACKED 1 /**< only one components containing all the channels */ |
56 #define FF_PIXEL_PALETTE 2 /**< one components containing indexes for a palette */ | 55 #define FF_PIXEL_PALETTE 2 /**< one components containing indexes for a palette */ |
56 | |
57 #if HAVE_MMX | |
58 #define deinterlace_line_inplace ff_deinterlace_line_inplace_mmx | |
59 #define deinterlace_line ff_deinterlace_line_mmx | |
60 #else | |
61 #define deinterlace_line_inplace deinterlace_line_inplace_c | |
62 #define deinterlace_line deinterlace_line_c | |
63 #endif | |
57 | 64 |
58 typedef struct PixFmtInfo { | 65 typedef struct PixFmtInfo { |
59 uint8_t nb_channels; /**< number of channels (including alpha) */ | 66 uint8_t nb_channels; /**< number of channels (including alpha) */ |
60 uint8_t color_type; /**< color type (see FF_COLOR_xxx constants) */ | 67 uint8_t color_type; /**< color type (see FF_COLOR_xxx constants) */ |
61 uint8_t pixel_type; /**< pixel storage type (see FF_PIXEL_xxx constants) */ | 68 uint8_t pixel_type; /**< pixel storage type (see FF_PIXEL_xxx constants) */ |
1117 break; | 1124 break; |
1118 } | 1125 } |
1119 return ret; | 1126 return ret; |
1120 } | 1127 } |
1121 | 1128 |
1122 #if HAVE_MMX | 1129 #if !HAVE_MMX |
1123 #define DEINT_INPLACE_LINE_LUM \ | |
1124 movd_m2r(lum_m4[0],mm0);\ | |
1125 movd_m2r(lum_m3[0],mm1);\ | |
1126 movd_m2r(lum_m2[0],mm2);\ | |
1127 movd_m2r(lum_m1[0],mm3);\ | |
1128 movd_m2r(lum[0],mm4);\ | |
1129 punpcklbw_r2r(mm7,mm0);\ | |
1130 movd_r2m(mm2,lum_m4[0]);\ | |
1131 punpcklbw_r2r(mm7,mm1);\ | |
1132 punpcklbw_r2r(mm7,mm2);\ | |
1133 punpcklbw_r2r(mm7,mm3);\ | |
1134 punpcklbw_r2r(mm7,mm4);\ | |
1135 paddw_r2r(mm3,mm1);\ | |
1136 psllw_i2r(1,mm2);\ | |
1137 paddw_r2r(mm4,mm0);\ | |
1138 psllw_i2r(2,mm1);\ | |
1139 paddw_r2r(mm6,mm2);\ | |
1140 paddw_r2r(mm2,mm1);\ | |
1141 psubusw_r2r(mm0,mm1);\ | |
1142 psrlw_i2r(3,mm1);\ | |
1143 packuswb_r2r(mm7,mm1);\ | |
1144 movd_r2m(mm1,lum_m2[0]); | |
1145 | |
1146 #define DEINT_LINE_LUM \ | |
1147 movd_m2r(lum_m4[0],mm0);\ | |
1148 movd_m2r(lum_m3[0],mm1);\ | |
1149 movd_m2r(lum_m2[0],mm2);\ | |
1150 movd_m2r(lum_m1[0],mm3);\ | |
1151 movd_m2r(lum[0],mm4);\ | |
1152 punpcklbw_r2r(mm7,mm0);\ | |
1153 punpcklbw_r2r(mm7,mm1);\ | |
1154 punpcklbw_r2r(mm7,mm2);\ | |
1155 punpcklbw_r2r(mm7,mm3);\ | |
1156 punpcklbw_r2r(mm7,mm4);\ | |
1157 paddw_r2r(mm3,mm1);\ | |
1158 psllw_i2r(1,mm2);\ | |
1159 paddw_r2r(mm4,mm0);\ | |
1160 psllw_i2r(2,mm1);\ | |
1161 paddw_r2r(mm6,mm2);\ | |
1162 paddw_r2r(mm2,mm1);\ | |
1163 psubusw_r2r(mm0,mm1);\ | |
1164 psrlw_i2r(3,mm1);\ | |
1165 packuswb_r2r(mm7,mm1);\ | |
1166 movd_r2m(mm1,dst[0]); | |
1167 #endif | |
1168 | |
1169 /* filter parameters: [-1 4 2 4 -1] // 8 */ | 1130 /* filter parameters: [-1 4 2 4 -1] // 8 */ |
1170 static void deinterlace_line(uint8_t *dst, | 1131 static void deinterlace_line_c(uint8_t *dst, |
1171 const uint8_t *lum_m4, const uint8_t *lum_m3, | 1132 const uint8_t *lum_m4, const uint8_t *lum_m3, |
1172 const uint8_t *lum_m2, const uint8_t *lum_m1, | 1133 const uint8_t *lum_m2, const uint8_t *lum_m1, |
1173 const uint8_t *lum, | 1134 const uint8_t *lum, |
1174 int size) | 1135 int size) |
1175 { | 1136 { |
1176 #if !HAVE_MMX | |
1177 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | 1137 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
1178 int sum; | 1138 int sum; |
1179 | 1139 |
1180 for(;size > 0;size--) { | 1140 for(;size > 0;size--) { |
1181 sum = -lum_m4[0]; | 1141 sum = -lum_m4[0]; |
1189 lum_m2++; | 1149 lum_m2++; |
1190 lum_m1++; | 1150 lum_m1++; |
1191 lum++; | 1151 lum++; |
1192 dst++; | 1152 dst++; |
1193 } | 1153 } |
1194 #else | 1154 } |
1195 | 1155 |
1196 { | 1156 static void deinterlace_line_inplace_c(uint8_t *lum_m4, uint8_t *lum_m3, |
1197 pxor_r2r(mm7,mm7); | 1157 uint8_t *lum_m2, uint8_t *lum_m1, |
1198 movq_m2r(ff_pw_4,mm6); | 1158 uint8_t *lum, int size) |
1199 } | 1159 { |
1200 for (;size > 3; size-=4) { | |
1201 DEINT_LINE_LUM | |
1202 lum_m4+=4; | |
1203 lum_m3+=4; | |
1204 lum_m2+=4; | |
1205 lum_m1+=4; | |
1206 lum+=4; | |
1207 dst+=4; | |
1208 } | |
1209 #endif | |
1210 } | |
1211 static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *lum_m2, uint8_t *lum_m1, uint8_t *lum, | |
1212 int size) | |
1213 { | |
1214 #if !HAVE_MMX | |
1215 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | 1160 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
1216 int sum; | 1161 int sum; |
1217 | 1162 |
1218 for(;size > 0;size--) { | 1163 for(;size > 0;size--) { |
1219 sum = -lum_m4[0]; | 1164 sum = -lum_m4[0]; |
1227 lum_m3++; | 1172 lum_m3++; |
1228 lum_m2++; | 1173 lum_m2++; |
1229 lum_m1++; | 1174 lum_m1++; |
1230 lum++; | 1175 lum++; |
1231 } | 1176 } |
1232 #else | 1177 } |
1233 | |
1234 { | |
1235 pxor_r2r(mm7,mm7); | |
1236 movq_m2r(ff_pw_4,mm6); | |
1237 } | |
1238 for (;size > 3; size-=4) { | |
1239 DEINT_INPLACE_LINE_LUM | |
1240 lum_m4+=4; | |
1241 lum_m3+=4; | |
1242 lum_m2+=4; | |
1243 lum_m1+=4; | |
1244 lum+=4; | |
1245 } | |
1246 #endif | 1178 #endif |
1247 } | |
1248 | 1179 |
1249 /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The | 1180 /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The |
1250 top field is copied as is, but the bottom field is deinterlaced | 1181 top field is copied as is, but the bottom field is deinterlaced |
1251 against the top field. */ | 1182 against the top field. */ |
1252 static void deinterlace_bottom_field(uint8_t *dst, int dst_wrap, | 1183 static void deinterlace_bottom_field(uint8_t *dst, int dst_wrap, |