comparison ppc/dsputil_altivec.c @ 1340:09b8fe0f0139 libavcodec

PPC fixes & clean-up patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
author michaelni
date Fri, 04 Jul 2003 09:39:05 +0000
parents f3152eb76f1a
children e8ff4783f188
comparison
equal deleted inserted replaced
1339:338a2f6e6402 1340:09b8fe0f0139
1084 register vector unsigned short 1084 register vector unsigned short
1085 pixelssum1, pixelssum2, temp3, 1085 pixelssum1, pixelssum2, temp3,
1086 pixelssum3, pixelssum4, temp4; 1086 pixelssum3, pixelssum4, temp4;
1087 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); 1087 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
1088 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); 1088 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
1089 1089
1090 POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
1091
1090 temp1 = vec_ld(0, pixels); 1092 temp1 = vec_ld(0, pixels);
1091 temp2 = vec_ld(16, pixels); 1093 temp2 = vec_ld(16, pixels);
1092 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); 1094 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
1093 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) 1095 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F)
1094 { 1096 {
1107 pixelssum3 = vec_add(pixelssum3, vctwo); 1109 pixelssum3 = vec_add(pixelssum3, vctwo);
1108 pixelssum1 = vec_add((vector unsigned short)pixelsv1, 1110 pixelssum1 = vec_add((vector unsigned short)pixelsv1,
1109 (vector unsigned short)pixelsv2); 1111 (vector unsigned short)pixelsv2);
1110 pixelssum1 = vec_add(pixelssum1, vctwo); 1112 pixelssum1 = vec_add(pixelssum1, vctwo);
1111 1113
1112 POWERPC_TBL_START_COUNT(altivec_put_pixels16_xy2_num, 1);
1113 for (i = 0; i < h ; i++) { 1114 for (i = 0; i < h ; i++) {
1114 blockv = vec_ld(0, block); 1115 blockv = vec_ld(0, block);
1115 1116
1116 temp1 = vec_ld(line_size, pixels); 1117 temp1 = vec_ld(line_size, pixels);
1117 temp2 = vec_ld(line_size + 16, pixels); 1118 temp2 = vec_ld(line_size + 16, pixels);
1205 pixelssum1, pixelssum2, temp3, 1206 pixelssum1, pixelssum2, temp3,
1206 pixelssum3, pixelssum4, temp4; 1207 pixelssum3, pixelssum4, temp4;
1207 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); 1208 register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
1208 register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); 1209 register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);
1209 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); 1210 register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
1210 1211
1212 POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
1213
1211 temp1 = vec_ld(0, pixels); 1214 temp1 = vec_ld(0, pixels);
1212 temp2 = vec_ld(16, pixels); 1215 temp2 = vec_ld(16, pixels);
1213 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); 1216 pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
1214 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) 1217 if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F)
1215 { 1218 {
1228 pixelssum3 = vec_add(pixelssum3, vcone); 1231 pixelssum3 = vec_add(pixelssum3, vcone);
1229 pixelssum1 = vec_add((vector unsigned short)pixelsv1, 1232 pixelssum1 = vec_add((vector unsigned short)pixelsv1,
1230 (vector unsigned short)pixelsv2); 1233 (vector unsigned short)pixelsv2);
1231 pixelssum1 = vec_add(pixelssum1, vcone); 1234 pixelssum1 = vec_add(pixelssum1, vcone);
1232 1235
1233 POWERPC_TBL_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
1234 for (i = 0; i < h ; i++) { 1236 for (i = 0; i < h ; i++) {
1235 blockv = vec_ld(0, block); 1237 blockv = vec_ld(0, block);
1236 1238
1237 temp1 = vec_ld(line_size, pixels); 1239 temp1 = vec_ld(line_size, pixels);
1238 temp2 = vec_ld(line_size + 16, pixels); 1240 temp2 = vec_ld(line_size + 16, pixels);