comparison imgconvert.c @ 12316:b01d01738bc4 libavcodec

Convert deinterlacing MMX code to YASM
author vitor
date Sat, 31 Jul 2010 14:50:51 +0000
parents 6c42dc376222
children 0046ee0d227c
comparison
equal deleted inserted replaced
12315:f903ba955ae7 12316:b01d01738bc4
37 #include "libavutil/colorspace.h" 37 #include "libavutil/colorspace.h"
38 #include "libavutil/pixdesc.h" 38 #include "libavutil/pixdesc.h"
39 #include "libavcore/imgutils.h" 39 #include "libavcore/imgutils.h"
40 40
41 #if HAVE_MMX 41 #if HAVE_MMX
42 #include "x86/mmx.h"
43 #include "x86/dsputil_mmx.h" 42 #include "x86/dsputil_mmx.h"
44 #endif 43 #endif
45 44
46 #define xglue(x, y) x ## y 45 #define xglue(x, y) x ## y
47 #define glue(x, y) xglue(x, y) 46 #define glue(x, y) xglue(x, y)
52 #define FF_COLOR_YUV_JPEG 3 /**< YUV color space. 0 <= Y <= 255, 0 <= U, V <= 255 */ 51 #define FF_COLOR_YUV_JPEG 3 /**< YUV color space. 0 <= Y <= 255, 0 <= U, V <= 255 */
53 52
54 #define FF_PIXEL_PLANAR 0 /**< each channel has one component in AVPicture */ 53 #define FF_PIXEL_PLANAR 0 /**< each channel has one component in AVPicture */
55 #define FF_PIXEL_PACKED 1 /**< only one components containing all the channels */ 54 #define FF_PIXEL_PACKED 1 /**< only one components containing all the channels */
56 #define FF_PIXEL_PALETTE 2 /**< one components containing indexes for a palette */ 55 #define FF_PIXEL_PALETTE 2 /**< one components containing indexes for a palette */
56
57 #if HAVE_MMX
58 #define deinterlace_line_inplace ff_deinterlace_line_inplace_mmx
59 #define deinterlace_line ff_deinterlace_line_mmx
60 #else
61 #define deinterlace_line_inplace deinterlace_line_inplace_c
62 #define deinterlace_line deinterlace_line_c
63 #endif
57 64
58 typedef struct PixFmtInfo { 65 typedef struct PixFmtInfo {
59 uint8_t nb_channels; /**< number of channels (including alpha) */ 66 uint8_t nb_channels; /**< number of channels (including alpha) */
60 uint8_t color_type; /**< color type (see FF_COLOR_xxx constants) */ 67 uint8_t color_type; /**< color type (see FF_COLOR_xxx constants) */
61 uint8_t pixel_type; /**< pixel storage type (see FF_PIXEL_xxx constants) */ 68 uint8_t pixel_type; /**< pixel storage type (see FF_PIXEL_xxx constants) */
1117 break; 1124 break;
1118 } 1125 }
1119 return ret; 1126 return ret;
1120 } 1127 }
1121 1128
1122 #if HAVE_MMX 1129 #if !HAVE_MMX
1123 #define DEINT_INPLACE_LINE_LUM \
1124 movd_m2r(lum_m4[0],mm0);\
1125 movd_m2r(lum_m3[0],mm1);\
1126 movd_m2r(lum_m2[0],mm2);\
1127 movd_m2r(lum_m1[0],mm3);\
1128 movd_m2r(lum[0],mm4);\
1129 punpcklbw_r2r(mm7,mm0);\
1130 movd_r2m(mm2,lum_m4[0]);\
1131 punpcklbw_r2r(mm7,mm1);\
1132 punpcklbw_r2r(mm7,mm2);\
1133 punpcklbw_r2r(mm7,mm3);\
1134 punpcklbw_r2r(mm7,mm4);\
1135 paddw_r2r(mm3,mm1);\
1136 psllw_i2r(1,mm2);\
1137 paddw_r2r(mm4,mm0);\
1138 psllw_i2r(2,mm1);\
1139 paddw_r2r(mm6,mm2);\
1140 paddw_r2r(mm2,mm1);\
1141 psubusw_r2r(mm0,mm1);\
1142 psrlw_i2r(3,mm1);\
1143 packuswb_r2r(mm7,mm1);\
1144 movd_r2m(mm1,lum_m2[0]);
1145
1146 #define DEINT_LINE_LUM \
1147 movd_m2r(lum_m4[0],mm0);\
1148 movd_m2r(lum_m3[0],mm1);\
1149 movd_m2r(lum_m2[0],mm2);\
1150 movd_m2r(lum_m1[0],mm3);\
1151 movd_m2r(lum[0],mm4);\
1152 punpcklbw_r2r(mm7,mm0);\
1153 punpcklbw_r2r(mm7,mm1);\
1154 punpcklbw_r2r(mm7,mm2);\
1155 punpcklbw_r2r(mm7,mm3);\
1156 punpcklbw_r2r(mm7,mm4);\
1157 paddw_r2r(mm3,mm1);\
1158 psllw_i2r(1,mm2);\
1159 paddw_r2r(mm4,mm0);\
1160 psllw_i2r(2,mm1);\
1161 paddw_r2r(mm6,mm2);\
1162 paddw_r2r(mm2,mm1);\
1163 psubusw_r2r(mm0,mm1);\
1164 psrlw_i2r(3,mm1);\
1165 packuswb_r2r(mm7,mm1);\
1166 movd_r2m(mm1,dst[0]);
1167 #endif
1168
1169 /* filter parameters: [-1 4 2 4 -1] // 8 */ 1130 /* filter parameters: [-1 4 2 4 -1] // 8 */
1170 static void deinterlace_line(uint8_t *dst, 1131 static void deinterlace_line_c(uint8_t *dst,
1171 const uint8_t *lum_m4, const uint8_t *lum_m3, 1132 const uint8_t *lum_m4, const uint8_t *lum_m3,
1172 const uint8_t *lum_m2, const uint8_t *lum_m1, 1133 const uint8_t *lum_m2, const uint8_t *lum_m1,
1173 const uint8_t *lum, 1134 const uint8_t *lum,
1174 int size) 1135 int size)
1175 { 1136 {
1176 #if !HAVE_MMX
1177 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 1137 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1178 int sum; 1138 int sum;
1179 1139
1180 for(;size > 0;size--) { 1140 for(;size > 0;size--) {
1181 sum = -lum_m4[0]; 1141 sum = -lum_m4[0];
1189 lum_m2++; 1149 lum_m2++;
1190 lum_m1++; 1150 lum_m1++;
1191 lum++; 1151 lum++;
1192 dst++; 1152 dst++;
1193 } 1153 }
1194 #else 1154 }
1195 1155
1196 { 1156 static void deinterlace_line_inplace_c(uint8_t *lum_m4, uint8_t *lum_m3,
1197 pxor_r2r(mm7,mm7); 1157 uint8_t *lum_m2, uint8_t *lum_m1,
1198 movq_m2r(ff_pw_4,mm6); 1158 uint8_t *lum, int size)
1199 } 1159 {
1200 for (;size > 3; size-=4) {
1201 DEINT_LINE_LUM
1202 lum_m4+=4;
1203 lum_m3+=4;
1204 lum_m2+=4;
1205 lum_m1+=4;
1206 lum+=4;
1207 dst+=4;
1208 }
1209 #endif
1210 }
1211 static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *lum_m2, uint8_t *lum_m1, uint8_t *lum,
1212 int size)
1213 {
1214 #if !HAVE_MMX
1215 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 1160 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1216 int sum; 1161 int sum;
1217 1162
1218 for(;size > 0;size--) { 1163 for(;size > 0;size--) {
1219 sum = -lum_m4[0]; 1164 sum = -lum_m4[0];
1227 lum_m3++; 1172 lum_m3++;
1228 lum_m2++; 1173 lum_m2++;
1229 lum_m1++; 1174 lum_m1++;
1230 lum++; 1175 lum++;
1231 } 1176 }
1232 #else 1177 }
1233
1234 {
1235 pxor_r2r(mm7,mm7);
1236 movq_m2r(ff_pw_4,mm6);
1237 }
1238 for (;size > 3; size-=4) {
1239 DEINT_INPLACE_LINE_LUM
1240 lum_m4+=4;
1241 lum_m3+=4;
1242 lum_m2+=4;
1243 lum_m1+=4;
1244 lum+=4;
1245 }
1246 #endif 1178 #endif
1247 }
1248 1179
1249 /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The 1180 /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
1250 top field is copied as is, but the bottom field is deinterlaced 1181 top field is copied as is, but the bottom field is deinterlaced
1251 against the top field. */ 1182 against the top field. */
1252 static void deinterlace_bottom_field(uint8_t *dst, int dst_wrap, 1183 static void deinterlace_bottom_field(uint8_t *dst, int dst_wrap,