Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 984:e162c09efbe7 libavcodec
qpel fix
author | michaelni |
---|---|
date | Thu, 09 Jan 2003 20:42:37 +0000 |
parents | 274b518c4ecb |
children | 4dfe15ae0078 |
comparison
equal
deleted
inserted
replaced
983:ca2a303ea039 | 984:e162c09efbe7 |
---|---|
1083 static void OPNAME ## qpel8_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\ | 1083 static void OPNAME ## qpel8_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\ |
1084 OPNAME ## pixels8_mmx(dst, src, stride, 8);\ | 1084 OPNAME ## pixels8_mmx(dst, src, stride, 8);\ |
1085 }\ | 1085 }\ |
1086 \ | 1086 \ |
1087 static void OPNAME ## qpel8_mc10_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1087 static void OPNAME ## qpel8_mc10_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1088 uint64_t temp[32];\ | 1088 uint64_t temp[8];\ |
1089 uint8_t * const half= (uint8_t*)temp;\ | 1089 uint8_t * const half= (uint8_t*)temp;\ |
1090 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ | 1090 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ |
1091 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ | 1091 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ |
1092 }\ | 1092 }\ |
1093 \ | 1093 \ |
1094 static void OPNAME ## qpel8_mc20_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1094 static void OPNAME ## qpel8_mc20_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1095 OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, stride, 8);\ | 1095 OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, stride, 8);\ |
1096 }\ | 1096 }\ |
1097 \ | 1097 \ |
1098 static void OPNAME ## qpel8_mc30_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1098 static void OPNAME ## qpel8_mc30_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1099 uint64_t temp[32];\ | 1099 uint64_t temp[8];\ |
1100 uint8_t * const half= (uint8_t*)temp;\ | 1100 uint8_t * const half= (uint8_t*)temp;\ |
1101 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ | 1101 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ |
1102 OPNAME ## pixels8_l2_mmx(dst, src+1, half, stride, stride, 8);\ | 1102 OPNAME ## pixels8_l2_mmx(dst, src+1, half, stride, stride, 8);\ |
1103 }\ | 1103 }\ |
1104 \ | 1104 \ |
1105 static void OPNAME ## qpel8_mc01_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1105 static void OPNAME ## qpel8_mc01_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1106 uint64_t temp[32];\ | 1106 uint64_t temp[8];\ |
1107 uint8_t * const half= (uint8_t*)temp;\ | 1107 uint8_t * const half= (uint8_t*)temp;\ |
1108 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ | 1108 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ |
1109 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ | 1109 OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ |
1110 }\ | 1110 }\ |
1111 \ | 1111 \ |
1112 static void OPNAME ## qpel8_mc02_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1112 static void OPNAME ## qpel8_mc02_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1113 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, stride, stride);\ | 1113 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, stride, stride);\ |
1114 }\ | 1114 }\ |
1115 \ | 1115 \ |
1116 static void OPNAME ## qpel8_mc03_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1116 static void OPNAME ## qpel8_mc03_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1117 uint64_t temp[32];\ | 1117 uint64_t temp[8];\ |
1118 uint8_t * const half= (uint8_t*)temp;\ | 1118 uint8_t * const half= (uint8_t*)temp;\ |
1119 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ | 1119 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ |
1120 OPNAME ## pixels8_l2_mmx(dst, src+stride, half, stride, stride, 8);\ | 1120 OPNAME ## pixels8_l2_mmx(dst, src+stride, half, stride, stride, 8);\ |
1121 }\ | 1121 }\ |
1122 static void OPNAME ## qpel8_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1122 static void OPNAME ## qpel8_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1123 uint64_t half[8*2 + 8*2 + 18*2];\ | 1123 uint64_t half[8 + 9];\ |
1124 uint8_t * const halfH= ((uint8_t*)half) + 2*64 + 8;\ | 1124 uint8_t * const halfH= ((uint8_t*)half) + 64;\ |
1125 uint8_t * const halfV= ((uint8_t*)half);\ | 1125 uint8_t * const halfHV= ((uint8_t*)half);\ |
1126 uint8_t * const halfHV= ((uint8_t*)half) + 64;\ | |
1127 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 1126 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
1128 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src, 8, stride);\ | 1127 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ |
1129 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 1128 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
1130 OPNAME ## pixels8_l4_mmx(dst, src, (uint8_t*)half, stride, 8);\ | 1129 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ |
1131 }\ | 1130 }\ |
1132 static void OPNAME ## qpel8_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1131 static void OPNAME ## qpel8_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1133 uint64_t half[8*2 + 8*2 + 18*2];\ | 1132 uint64_t half[8 + 9];\ |
1134 uint8_t * const halfH= ((uint8_t*)half) + 2*64 + 8;\ | 1133 uint8_t * const halfH= ((uint8_t*)half) + 64;\ |
1135 uint8_t * const halfV= ((uint8_t*)half);\ | 1134 uint8_t * const halfHV= ((uint8_t*)half);\ |
1136 uint8_t * const halfHV= ((uint8_t*)half) + 64;\ | |
1137 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 1135 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
1138 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src+1, 8, stride);\ | 1136 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ |
1139 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 1137 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
1140 OPNAME ## pixels8_l4_mmx(dst, src+1, (uint8_t*)half, stride, 8);\ | 1138 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ |
1141 }\ | 1139 }\ |
1142 static void OPNAME ## qpel8_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1140 static void OPNAME ## qpel8_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1143 uint64_t half[8*2 + 8*2 + 9*2];\ | 1141 uint64_t half[8 + 9];\ |
1144 uint8_t * const halfH= ((uint8_t*)half) + 2*64;\ | 1142 uint8_t * const halfH= ((uint8_t*)half) + 64;\ |
1145 uint8_t * const halfV= ((uint8_t*)half);\ | 1143 uint8_t * const halfHV= ((uint8_t*)half);\ |
1146 uint8_t * const halfHV= ((uint8_t*)half) + 64;\ | |
1147 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 1144 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
1148 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src, 8, stride);\ | 1145 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ |
1149 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 1146 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
1150 OPNAME ## pixels8_l4_mmx(dst, src+stride, (uint8_t*)half, stride, 8);\ | 1147 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ |
1151 }\ | 1148 }\ |
1152 static void OPNAME ## qpel8_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1149 static void OPNAME ## qpel8_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1153 uint64_t half[8*2 + 8*2 + 9*2];\ | 1150 uint64_t half[8 + 9];\ |
1154 uint8_t * const halfH= ((uint8_t*)half) + 2*64;\ | 1151 uint8_t * const halfH= ((uint8_t*)half) + 64;\ |
1155 uint8_t * const halfV= ((uint8_t*)half);\ | 1152 uint8_t * const halfHV= ((uint8_t*)half);\ |
1156 uint8_t * const halfHV= ((uint8_t*)half) + 64;\ | 1153 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
1157 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src , 8, stride, 9);\ | 1154 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ |
1158 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src+1, 8, stride);\ | |
1159 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 1155 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
1160 OPNAME ## pixels8_l4_mmx(dst, src+stride+1, (uint8_t*)half, stride, 8);\ | 1156 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ |
1161 }\ | 1157 }\ |
1162 static void OPNAME ## qpel8_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1158 static void OPNAME ## qpel8_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1163 uint64_t half[8*2 + 9*2];\ | 1159 uint64_t half[8 + 9];\ |
1164 uint8_t * const halfH= ((uint8_t*)half) + 64;\ | 1160 uint8_t * const halfH= ((uint8_t*)half) + 64;\ |
1165 uint8_t * const halfHV= ((uint8_t*)half);\ | 1161 uint8_t * const halfHV= ((uint8_t*)half);\ |
1166 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 1162 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
1167 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 1163 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
1168 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ | 1164 OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ |
1169 }\ | 1165 }\ |
1170 static void OPNAME ## qpel8_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1166 static void OPNAME ## qpel8_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1171 uint64_t half[8*2 + 9*2];\ | 1167 uint64_t half[8 + 9];\ |
1172 uint8_t * const halfH= ((uint8_t*)half) + 64;\ | 1168 uint8_t * const halfH= ((uint8_t*)half) + 64;\ |
1173 uint8_t * const halfHV= ((uint8_t*)half);\ | 1169 uint8_t * const halfHV= ((uint8_t*)half);\ |
1174 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 1170 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
1175 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 1171 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ |
1176 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ | 1172 OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ |
1177 }\ | 1173 }\ |
1178 static void OPNAME ## qpel8_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1174 static void OPNAME ## qpel8_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1179 uint64_t half[8*2 + 8*2 + 9*2];\ | 1175 uint64_t half[8 + 9];\ |
1180 uint8_t * const halfH= ((uint8_t*)half) + 2*64;\ | 1176 uint8_t * const halfH= ((uint8_t*)half);\ |
1181 uint8_t * const halfV= ((uint8_t*)half);\ | |
1182 uint8_t * const halfHV= ((uint8_t*)half) + 64;\ | |
1183 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 1177 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
1184 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src, 8, stride);\ | 1178 put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ |
1185 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 1179 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ |
1186 OPNAME ## pixels8_l2_mmx(dst, halfV, halfHV, stride, 8, 8);\ | |
1187 }\ | 1180 }\ |
1188 static void OPNAME ## qpel8_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1181 static void OPNAME ## qpel8_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1189 uint64_t half[8*2 + 8*2 + 9*2];\ | 1182 uint64_t half[8 + 9];\ |
1190 uint8_t * const halfH= ((uint8_t*)half) + 2*64;\ | 1183 uint8_t * const halfH= ((uint8_t*)half);\ |
1191 uint8_t * const halfV= ((uint8_t*)half);\ | |
1192 uint8_t * const halfHV= ((uint8_t*)half) + 64;\ | |
1193 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 1184 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
1194 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src+1, 8, stride);\ | 1185 put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ |
1195 put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ | 1186 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ |
1196 OPNAME ## pixels8_l2_mmx(dst, halfV, halfHV, stride, 8, 8);\ | |
1197 }\ | 1187 }\ |
1198 static void OPNAME ## qpel8_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1188 static void OPNAME ## qpel8_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1199 uint64_t half[9*2];\ | 1189 uint64_t half[9];\ |
1200 uint8_t * const halfH= ((uint8_t*)half);\ | 1190 uint8_t * const halfH= ((uint8_t*)half);\ |
1201 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ | 1191 put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ |
1202 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ | 1192 OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ |
1203 }\ | 1193 }\ |
1204 static void OPNAME ## qpel16_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\ | 1194 static void OPNAME ## qpel16_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\ |
1239 uint8_t * const half= (uint8_t*)temp;\ | 1229 uint8_t * const half= (uint8_t*)temp;\ |
1240 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ | 1230 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ |
1241 OPNAME ## pixels16_l2_mmx(dst, src+stride, half, stride, stride, 16);\ | 1231 OPNAME ## pixels16_l2_mmx(dst, src+stride, half, stride, stride, 16);\ |
1242 }\ | 1232 }\ |
1243 static void OPNAME ## qpel16_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1233 static void OPNAME ## qpel16_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1244 uint64_t half[16*2 + 16*2 + 18*2];\ | 1234 uint64_t half[16*2 + 17*2];\ |
1245 uint8_t * const halfH= ((uint8_t*)half) + 2*256 + 16;\ | 1235 uint8_t * const halfH= ((uint8_t*)half) + 256;\ |
1246 uint8_t * const halfV= ((uint8_t*)half);\ | 1236 uint8_t * const halfHV= ((uint8_t*)half);\ |
1247 uint8_t * const halfHV= ((uint8_t*)half) + 256;\ | |
1248 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 1237 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
1249 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src, 16, stride);\ | 1238 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ |
1250 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 1239 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ |
1251 OPNAME ## pixels16_l4_mmx(dst, src, (uint8_t*)half, stride, 16);\ | 1240 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ |
1252 }\ | 1241 }\ |
1253 static void OPNAME ## qpel16_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1242 static void OPNAME ## qpel16_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1254 uint64_t half[16*2 + 16*2 + 18*2];\ | 1243 uint64_t half[16*2 + 17*2];\ |
1255 uint8_t * const halfH= ((uint8_t*)half) + 2*256 + 16;\ | 1244 uint8_t * const halfH= ((uint8_t*)half) + 256;\ |
1256 uint8_t * const halfV= ((uint8_t*)half);\ | 1245 uint8_t * const halfHV= ((uint8_t*)half);\ |
1257 uint8_t * const halfHV= ((uint8_t*)half) + 256;\ | |
1258 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 1246 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
1259 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src+1, 16, stride);\ | 1247 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ |
1260 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 1248 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ |
1261 OPNAME ## pixels16_l4_mmx(dst, src+1, (uint8_t*)half, stride, 16);\ | 1249 OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ |
1262 }\ | 1250 }\ |
1263 static void OPNAME ## qpel16_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1251 static void OPNAME ## qpel16_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1264 uint64_t half[16*2 + 16*2 + 17*2];\ | 1252 uint64_t half[16*2 + 17*2];\ |
1265 uint8_t * const halfH= ((uint8_t*)half) + 2*256;\ | 1253 uint8_t * const halfH= ((uint8_t*)half) + 256;\ |
1266 uint8_t * const halfV= ((uint8_t*)half);\ | 1254 uint8_t * const halfHV= ((uint8_t*)half);\ |
1267 uint8_t * const halfHV= ((uint8_t*)half) + 256;\ | |
1268 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 1255 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
1269 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src, 16, stride);\ | 1256 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ |
1270 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 1257 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ |
1271 OPNAME ## pixels16_l4_mmx(dst, src+stride, (uint8_t*)half, stride, 16);\ | 1258 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ |
1272 }\ | 1259 }\ |
1273 static void OPNAME ## qpel16_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1260 static void OPNAME ## qpel16_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1274 uint64_t half[16*2 + 16*2 + 17*2];\ | 1261 uint64_t half[16*2 + 17*2];\ |
1275 uint8_t * const halfH= ((uint8_t*)half) + 2*256;\ | 1262 uint8_t * const halfH= ((uint8_t*)half) + 256;\ |
1276 uint8_t * const halfV= ((uint8_t*)half);\ | 1263 uint8_t * const halfHV= ((uint8_t*)half);\ |
1277 uint8_t * const halfHV= ((uint8_t*)half) + 256;\ | 1264 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
1278 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src , 16, stride, 17);\ | 1265 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ |
1279 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src+1, 16, stride);\ | |
1280 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 1266 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ |
1281 OPNAME ## pixels16_l4_mmx(dst, src+stride+1, (uint8_t*)half, stride, 16);\ | 1267 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ |
1282 }\ | 1268 }\ |
1283 static void OPNAME ## qpel16_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1269 static void OPNAME ## qpel16_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1284 uint64_t half[16*2 + 17*2];\ | 1270 uint64_t half[16*2 + 17*2];\ |
1285 uint8_t * const halfH= ((uint8_t*)half) + 256;\ | 1271 uint8_t * const halfH= ((uint8_t*)half) + 256;\ |
1286 uint8_t * const halfHV= ((uint8_t*)half);\ | 1272 uint8_t * const halfHV= ((uint8_t*)half);\ |
1295 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 1281 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
1296 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 1282 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ |
1297 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ | 1283 OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ |
1298 }\ | 1284 }\ |
1299 static void OPNAME ## qpel16_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1285 static void OPNAME ## qpel16_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1300 uint64_t half[16*2 + 16*2 + 17*2];\ | 1286 uint64_t half[17*2];\ |
1301 uint8_t * const halfH= ((uint8_t*)half) + 2*256;\ | 1287 uint8_t * const halfH= ((uint8_t*)half);\ |
1302 uint8_t * const halfV= ((uint8_t*)half);\ | |
1303 uint8_t * const halfHV= ((uint8_t*)half) + 256;\ | |
1304 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 1288 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
1305 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src, 16, stride);\ | 1289 put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ |
1306 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 1290 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ |
1307 OPNAME ## pixels16_l2_mmx(dst, halfV, halfHV, stride, 16, 16);\ | |
1308 }\ | 1291 }\ |
1309 static void OPNAME ## qpel16_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1292 static void OPNAME ## qpel16_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1310 uint64_t half[16*2 + 16*2 + 17*2];\ | 1293 uint64_t half[17*2];\ |
1311 uint8_t * const halfH= ((uint8_t*)half) + 2*256;\ | 1294 uint8_t * const halfH= ((uint8_t*)half);\ |
1312 uint8_t * const halfV= ((uint8_t*)half);\ | |
1313 uint8_t * const halfHV= ((uint8_t*)half) + 256;\ | |
1314 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 1295 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
1315 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src+1, 16, stride);\ | 1296 put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ |
1316 put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ | 1297 OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ |
1317 OPNAME ## pixels16_l2_mmx(dst, halfV, halfHV, stride, 16, 16);\ | |
1318 }\ | 1298 }\ |
1319 static void OPNAME ## qpel16_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ | 1299 static void OPNAME ## qpel16_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\ |
1320 uint64_t half[17*2];\ | 1300 uint64_t half[17*2];\ |
1321 uint8_t * const halfH= ((uint8_t*)half);\ | 1301 uint8_t * const halfH= ((uint8_t*)half);\ |
1322 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ | 1302 put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ |
1523 | 1503 |
1524 c->avg_pixels_tab[1][0] = avg_pixels8_3dnow; | 1504 c->avg_pixels_tab[1][0] = avg_pixels8_3dnow; |
1525 c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; | 1505 c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; |
1526 c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; | 1506 c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; |
1527 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; | 1507 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; |
1528 | 1508 |
1529 SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_3dnow) | 1509 SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_3dnow) |
1530 SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_3dnow) | 1510 SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_3dnow) |
1531 SET_QPEL_FUNC(qpel_pixels_tab[0][ 2], qpel16_mc20_3dnow) | 1511 SET_QPEL_FUNC(qpel_pixels_tab[0][ 2], qpel16_mc20_3dnow) |
1532 SET_QPEL_FUNC(qpel_pixels_tab[0][ 3], qpel16_mc30_3dnow) | 1512 SET_QPEL_FUNC(qpel_pixels_tab[0][ 3], qpel16_mc30_3dnow) |
1533 SET_QPEL_FUNC(qpel_pixels_tab[0][ 4], qpel16_mc01_3dnow) | 1513 SET_QPEL_FUNC(qpel_pixels_tab[0][ 4], qpel16_mc01_3dnow) |