comparison i386/dsputil_mmx.c @ 984:e162c09efbe7 libavcodec

qpel fix
author michaelni
date Thu, 09 Jan 2003 20:42:37 +0000
parents 274b518c4ecb
children 4dfe15ae0078
comparison
equal deleted inserted replaced
983:ca2a303ea039 984:e162c09efbe7
1083 static void OPNAME ## qpel8_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\ 1083 static void OPNAME ## qpel8_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\
1084 OPNAME ## pixels8_mmx(dst, src, stride, 8);\ 1084 OPNAME ## pixels8_mmx(dst, src, stride, 8);\
1085 }\ 1085 }\
1086 \ 1086 \
1087 static void OPNAME ## qpel8_mc10_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1087 static void OPNAME ## qpel8_mc10_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1088 uint64_t temp[32];\ 1088 uint64_t temp[8];\
1089 uint8_t * const half= (uint8_t*)temp;\ 1089 uint8_t * const half= (uint8_t*)temp;\
1090 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ 1090 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
1091 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ 1091 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\
1092 }\ 1092 }\
1093 \ 1093 \
1094 static void OPNAME ## qpel8_mc20_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1094 static void OPNAME ## qpel8_mc20_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1095 OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, stride, 8);\ 1095 OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, stride, 8);\
1096 }\ 1096 }\
1097 \ 1097 \
1098 static void OPNAME ## qpel8_mc30_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1098 static void OPNAME ## qpel8_mc30_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1099 uint64_t temp[32];\ 1099 uint64_t temp[8];\
1100 uint8_t * const half= (uint8_t*)temp;\ 1100 uint8_t * const half= (uint8_t*)temp;\
1101 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ 1101 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
1102 OPNAME ## pixels8_l2_mmx(dst, src+1, half, stride, stride, 8);\ 1102 OPNAME ## pixels8_l2_mmx(dst, src+1, half, stride, stride, 8);\
1103 }\ 1103 }\
1104 \ 1104 \
1105 static void OPNAME ## qpel8_mc01_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1105 static void OPNAME ## qpel8_mc01_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1106 uint64_t temp[32];\ 1106 uint64_t temp[8];\
1107 uint8_t * const half= (uint8_t*)temp;\ 1107 uint8_t * const half= (uint8_t*)temp;\
1108 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ 1108 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
1109 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ 1109 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\
1110 }\ 1110 }\
1111 \ 1111 \
1112 static void OPNAME ## qpel8_mc02_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1112 static void OPNAME ## qpel8_mc02_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1113 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, stride, stride);\ 1113 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, stride, stride);\
1114 }\ 1114 }\
1115 \ 1115 \
1116 static void OPNAME ## qpel8_mc03_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1116 static void OPNAME ## qpel8_mc03_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1117 uint64_t temp[32];\ 1117 uint64_t temp[8];\
1118 uint8_t * const half= (uint8_t*)temp;\ 1118 uint8_t * const half= (uint8_t*)temp;\
1119 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ 1119 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
1120 OPNAME ## pixels8_l2_mmx(dst, src+stride, half, stride, stride, 8);\ 1120 OPNAME ## pixels8_l2_mmx(dst, src+stride, half, stride, stride, 8);\
1121 }\ 1121 }\
1122 static void OPNAME ## qpel8_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1122 static void OPNAME ## qpel8_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1123 uint64_t half[8*2 + 8*2 + 18*2];\ 1123 uint64_t half[8 + 9];\
1124 uint8_t * const halfH= ((uint8_t*)half) + 2*64 + 8;\ 1124 uint8_t * const halfH= ((uint8_t*)half) + 64;\
1125 uint8_t * const halfV= ((uint8_t*)half);\ 1125 uint8_t * const halfHV= ((uint8_t*)half);\
1126 uint8_t * const halfHV= ((uint8_t*)half) + 64;\
1127 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 1126 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
1128 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src, 8, stride);\ 1127 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\
1129 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 1128 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
1130 OPNAME ## pixels8_l4_mmx(dst, src, (uint8_t*)half, stride, 8);\ 1129 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\
1131 }\ 1130 }\
1132 static void OPNAME ## qpel8_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1131 static void OPNAME ## qpel8_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1133 uint64_t half[8*2 + 8*2 + 18*2];\ 1132 uint64_t half[8 + 9];\
1134 uint8_t * const halfH= ((uint8_t*)half) + 2*64 + 8;\ 1133 uint8_t * const halfH= ((uint8_t*)half) + 64;\
1135 uint8_t * const halfV= ((uint8_t*)half);\ 1134 uint8_t * const halfHV= ((uint8_t*)half);\
1136 uint8_t * const halfHV= ((uint8_t*)half) + 64;\
1137 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 1135 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
1138 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src+1, 8, stride);\ 1136 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\
1139 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 1137 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
1140 OPNAME ## pixels8_l4_mmx(dst, src+1, (uint8_t*)half, stride, 8);\ 1138 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\
1141 }\ 1139 }\
1142 static void OPNAME ## qpel8_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1140 static void OPNAME ## qpel8_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1143 uint64_t half[8*2 + 8*2 + 9*2];\ 1141 uint64_t half[8 + 9];\
1144 uint8_t * const halfH= ((uint8_t*)half) + 2*64;\ 1142 uint8_t * const halfH= ((uint8_t*)half) + 64;\
1145 uint8_t * const halfV= ((uint8_t*)half);\ 1143 uint8_t * const halfHV= ((uint8_t*)half);\
1146 uint8_t * const halfHV= ((uint8_t*)half) + 64;\
1147 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 1144 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
1148 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src, 8, stride);\ 1145 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\
1149 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 1146 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
1150 OPNAME ## pixels8_l4_mmx(dst, src+stride, (uint8_t*)half, stride, 8);\ 1147 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\
1151 }\ 1148 }\
1152 static void OPNAME ## qpel8_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1149 static void OPNAME ## qpel8_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1153 uint64_t half[8*2 + 8*2 + 9*2];\ 1150 uint64_t half[8 + 9];\
1154 uint8_t * const halfH= ((uint8_t*)half) + 2*64;\ 1151 uint8_t * const halfH= ((uint8_t*)half) + 64;\
1155 uint8_t * const halfV= ((uint8_t*)half);\ 1152 uint8_t * const halfHV= ((uint8_t*)half);\
1156 uint8_t * const halfHV= ((uint8_t*)half) + 64;\ 1153 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
1157 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src , 8, stride, 9);\ 1154 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\
1158 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src+1, 8, stride);\
1159 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 1155 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
1160 OPNAME ## pixels8_l4_mmx(dst, src+stride+1, (uint8_t*)half, stride, 8);\ 1156 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\
1161 }\ 1157 }\
1162 static void OPNAME ## qpel8_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1158 static void OPNAME ## qpel8_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1163 uint64_t half[8*2 + 9*2];\ 1159 uint64_t half[8 + 9];\
1164 uint8_t * const halfH= ((uint8_t*)half) + 64;\ 1160 uint8_t * const halfH= ((uint8_t*)half) + 64;\
1165 uint8_t * const halfHV= ((uint8_t*)half);\ 1161 uint8_t * const halfHV= ((uint8_t*)half);\
1166 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 1162 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
1167 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 1163 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
1168 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ 1164 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\
1169 }\ 1165 }\
1170 static void OPNAME ## qpel8_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1166 static void OPNAME ## qpel8_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1171 uint64_t half[8*2 + 9*2];\ 1167 uint64_t half[8 + 9];\
1172 uint8_t * const halfH= ((uint8_t*)half) + 64;\ 1168 uint8_t * const halfH= ((uint8_t*)half) + 64;\
1173 uint8_t * const halfHV= ((uint8_t*)half);\ 1169 uint8_t * const halfHV= ((uint8_t*)half);\
1174 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 1170 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
1175 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 1171 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
1176 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ 1172 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\
1177 }\ 1173 }\
1178 static void OPNAME ## qpel8_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1174 static void OPNAME ## qpel8_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1179 uint64_t half[8*2 + 8*2 + 9*2];\ 1175 uint64_t half[8 + 9];\
1180 uint8_t * const halfH= ((uint8_t*)half) + 2*64;\ 1176 uint8_t * const halfH= ((uint8_t*)half);\
1181 uint8_t * const halfV= ((uint8_t*)half);\
1182 uint8_t * const halfHV= ((uint8_t*)half) + 64;\
1183 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 1177 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
1184 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src, 8, stride);\ 1178 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\
1185 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 1179 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
1186 OPNAME ## pixels8_l2_mmx(dst, halfV, halfHV, stride, 8, 8);\
1187 }\ 1180 }\
1188 static void OPNAME ## qpel8_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1181 static void OPNAME ## qpel8_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1189 uint64_t half[8*2 + 8*2 + 9*2];\ 1182 uint64_t half[8 + 9];\
1190 uint8_t * const halfH= ((uint8_t*)half) + 2*64;\ 1183 uint8_t * const halfH= ((uint8_t*)half);\
1191 uint8_t * const halfV= ((uint8_t*)half);\
1192 uint8_t * const halfHV= ((uint8_t*)half) + 64;\
1193 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 1184 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
1194 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src+1, 8, stride);\ 1185 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\
1195 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ 1186 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
1196 OPNAME ## pixels8_l2_mmx(dst, halfV, halfHV, stride, 8, 8);\
1197 }\ 1187 }\
1198 static void OPNAME ## qpel8_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1188 static void OPNAME ## qpel8_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1199 uint64_t half[9*2];\ 1189 uint64_t half[9];\
1200 uint8_t * const halfH= ((uint8_t*)half);\ 1190 uint8_t * const halfH= ((uint8_t*)half);\
1201 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ 1191 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
1202 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ 1192 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
1203 }\ 1193 }\
1204 static void OPNAME ## qpel16_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\ 1194 static void OPNAME ## qpel16_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\
1239 uint8_t * const half= (uint8_t*)temp;\ 1229 uint8_t * const half= (uint8_t*)temp;\
1240 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ 1230 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\
1241 OPNAME ## pixels16_l2_mmx(dst, src+stride, half, stride, stride, 16);\ 1231 OPNAME ## pixels16_l2_mmx(dst, src+stride, half, stride, stride, 16);\
1242 }\ 1232 }\
1243 static void OPNAME ## qpel16_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1233 static void OPNAME ## qpel16_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1244 uint64_t half[16*2 + 16*2 + 18*2];\ 1234 uint64_t half[16*2 + 17*2];\
1245 uint8_t * const halfH= ((uint8_t*)half) + 2*256 + 16;\ 1235 uint8_t * const halfH= ((uint8_t*)half) + 256;\
1246 uint8_t * const halfV= ((uint8_t*)half);\ 1236 uint8_t * const halfHV= ((uint8_t*)half);\
1247 uint8_t * const halfHV= ((uint8_t*)half) + 256;\
1248 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 1237 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
1249 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src, 16, stride);\ 1238 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\
1250 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 1239 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
1251 OPNAME ## pixels16_l4_mmx(dst, src, (uint8_t*)half, stride, 16);\ 1240 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\
1252 }\ 1241 }\
1253 static void OPNAME ## qpel16_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1242 static void OPNAME ## qpel16_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1254 uint64_t half[16*2 + 16*2 + 18*2];\ 1243 uint64_t half[16*2 + 17*2];\
1255 uint8_t * const halfH= ((uint8_t*)half) + 2*256 + 16;\ 1244 uint8_t * const halfH= ((uint8_t*)half) + 256;\
1256 uint8_t * const halfV= ((uint8_t*)half);\ 1245 uint8_t * const halfHV= ((uint8_t*)half);\
1257 uint8_t * const halfHV= ((uint8_t*)half) + 256;\
1258 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 1246 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
1259 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src+1, 16, stride);\ 1247 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\
1260 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 1248 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
1261 OPNAME ## pixels16_l4_mmx(dst, src+1, (uint8_t*)half, stride, 16);\ 1249 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\
1262 }\ 1250 }\
1263 static void OPNAME ## qpel16_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1251 static void OPNAME ## qpel16_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1264 uint64_t half[16*2 + 16*2 + 17*2];\ 1252 uint64_t half[16*2 + 17*2];\
1265 uint8_t * const halfH= ((uint8_t*)half) + 2*256;\ 1253 uint8_t * const halfH= ((uint8_t*)half) + 256;\
1266 uint8_t * const halfV= ((uint8_t*)half);\ 1254 uint8_t * const halfHV= ((uint8_t*)half);\
1267 uint8_t * const halfHV= ((uint8_t*)half) + 256;\
1268 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 1255 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
1269 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src, 16, stride);\ 1256 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\
1270 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 1257 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
1271 OPNAME ## pixels16_l4_mmx(dst, src+stride, (uint8_t*)half, stride, 16);\ 1258 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\
1272 }\ 1259 }\
1273 static void OPNAME ## qpel16_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1260 static void OPNAME ## qpel16_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1274 uint64_t half[16*2 + 16*2 + 17*2];\ 1261 uint64_t half[16*2 + 17*2];\
1275 uint8_t * const halfH= ((uint8_t*)half) + 2*256;\ 1262 uint8_t * const halfH= ((uint8_t*)half) + 256;\
1276 uint8_t * const halfV= ((uint8_t*)half);\ 1263 uint8_t * const halfHV= ((uint8_t*)half);\
1277 uint8_t * const halfHV= ((uint8_t*)half) + 256;\ 1264 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
1278 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src , 16, stride, 17);\ 1265 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\
1279 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src+1, 16, stride);\
1280 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 1266 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
1281 OPNAME ## pixels16_l4_mmx(dst, src+stride+1, (uint8_t*)half, stride, 16);\ 1267 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\
1282 }\ 1268 }\
1283 static void OPNAME ## qpel16_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1269 static void OPNAME ## qpel16_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1284 uint64_t half[16*2 + 17*2];\ 1270 uint64_t half[16*2 + 17*2];\
1285 uint8_t * const halfH= ((uint8_t*)half) + 256;\ 1271 uint8_t * const halfH= ((uint8_t*)half) + 256;\
1286 uint8_t * const halfHV= ((uint8_t*)half);\ 1272 uint8_t * const halfHV= ((uint8_t*)half);\
1295 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 1281 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
1296 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 1282 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
1297 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ 1283 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\
1298 }\ 1284 }\
1299 static void OPNAME ## qpel16_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1285 static void OPNAME ## qpel16_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1300 uint64_t half[16*2 + 16*2 + 17*2];\ 1286 uint64_t half[17*2];\
1301 uint8_t * const halfH= ((uint8_t*)half) + 2*256;\ 1287 uint8_t * const halfH= ((uint8_t*)half);\
1302 uint8_t * const halfV= ((uint8_t*)half);\
1303 uint8_t * const halfHV= ((uint8_t*)half) + 256;\
1304 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 1288 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
1305 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src, 16, stride);\ 1289 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\
1306 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 1290 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
1307 OPNAME ## pixels16_l2_mmx(dst, halfV, halfHV, stride, 16, 16);\
1308 }\ 1291 }\
1309 static void OPNAME ## qpel16_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1292 static void OPNAME ## qpel16_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1310 uint64_t half[16*2 + 16*2 + 17*2];\ 1293 uint64_t half[17*2];\
1311 uint8_t * const halfH= ((uint8_t*)half) + 2*256;\ 1294 uint8_t * const halfH= ((uint8_t*)half);\
1312 uint8_t * const halfV= ((uint8_t*)half);\
1313 uint8_t * const halfHV= ((uint8_t*)half) + 256;\
1314 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 1295 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
1315 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src+1, 16, stride);\ 1296 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\
1316 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ 1297 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
1317 OPNAME ## pixels16_l2_mmx(dst, halfV, halfHV, stride, 16, 16);\
1318 }\ 1298 }\
1319 static void OPNAME ## qpel16_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ 1299 static void OPNAME ## qpel16_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
1320 uint64_t half[17*2];\ 1300 uint64_t half[17*2];\
1321 uint8_t * const halfH= ((uint8_t*)half);\ 1301 uint8_t * const halfH= ((uint8_t*)half);\
1322 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ 1302 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
1523 1503
1524 c->avg_pixels_tab[1][0] = avg_pixels8_3dnow; 1504 c->avg_pixels_tab[1][0] = avg_pixels8_3dnow;
1525 c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; 1505 c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
1526 c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; 1506 c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
1527 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; 1507 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
1528 1508
1529 SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_3dnow) 1509 SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_3dnow)
1530 SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_3dnow) 1510 SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_3dnow)
1531 SET_QPEL_FUNC(qpel_pixels_tab[0][ 2], qpel16_mc20_3dnow) 1511 SET_QPEL_FUNC(qpel_pixels_tab[0][ 2], qpel16_mc20_3dnow)
1532 SET_QPEL_FUNC(qpel_pixels_tab[0][ 3], qpel16_mc30_3dnow) 1512 SET_QPEL_FUNC(qpel_pixels_tab[0][ 3], qpel16_mc30_3dnow)
1533 SET_QPEL_FUNC(qpel_pixels_tab[0][ 4], qpel16_mc01_3dnow) 1513 SET_QPEL_FUNC(qpel_pixels_tab[0][ 4], qpel16_mc01_3dnow)