comparison postprocess_altivec_template.c @ 104:883d75fc0712 libpostproc

cosmetics: prettyprinting
author diego
date Sun, 23 Mar 2008 16:01:13 +0000
parents 53295ae2d45e
children 83d51d1fb580
comparison
equal deleted inserted replaced
103:53295ae2d45e 104:883d75fc0712
352 const vector signed char neg1 = vec_splat_s8(-1); 352 const vector signed char neg1 = vec_splat_s8(-1);
353 const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 353 const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
354 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); 354 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
355 355
356 #define PACK_AND_STORE(i) \ 356 #define PACK_AND_STORE(i) \
357 { const vector unsigned char perms##i = \ 357 { const vector unsigned char perms##i = \
358 vec_lvsr(i * stride, src2); \ 358 vec_lvsr(i * stride, src2); \
359 const vector unsigned char vf##i = \ 359 const vector unsigned char vf##i = \
360 vec_packsu(vr##i, (vector signed short)zero); \ 360 vec_packsu(vr##i, (vector signed short)zero); \
361 const vector unsigned char vg##i = \ 361 const vector unsigned char vg##i = \
362 vec_perm(vf##i, vbT##i, permHH); \ 362 vec_perm(vf##i, vbT##i, permHH); \
370 vec_sel(vg2##i, vbB##i, mask##i); \ 370 vec_sel(vg2##i, vbB##i, mask##i); \
371 vec_st(svA##i, i * stride, src2); \ 371 vec_st(svA##i, i * stride, src2); \
372 vec_st(svB##i, i * stride + 16, src2);} 372 vec_st(svB##i, i * stride + 16, src2);}
373 373
374 #define PACK_AND_STORE_ALIGNED(i) \ 374 #define PACK_AND_STORE_ALIGNED(i) \
375 { const vector unsigned char vf##i = \ 375 { const vector unsigned char vf##i = \
376 vec_packsu(vr##i, (vector signed short)zero); \ 376 vec_packsu(vr##i, (vector signed short)zero); \
377 const vector unsigned char vg##i = \ 377 const vector unsigned char vg##i = \
378 vec_perm(vf##i, vbT##i, permHH); \ 378 vec_perm(vf##i, vbT##i, permHH); \
379 vec_st(vg##i, i * stride, src2);} 379 vec_st(vg##i, i * stride, src2);}
380 380
507 const vector signed char neg1 = vec_splat_s8(-1); 507 const vector signed char neg1 = vec_splat_s8(-1);
508 const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 508 const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
509 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); 509 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
510 510
511 #define STORE(i) \ 511 #define STORE(i) \
512 { const vector unsigned char perms##i = \ 512 { const vector unsigned char perms##i = \
513 vec_lvsr(i * stride, src2); \ 513 vec_lvsr(i * stride, src2); \
514 const vector unsigned char vg##i = \ 514 const vector unsigned char vg##i = \
515 vec_perm(st##i, vbT##i, permHH); \ 515 vec_perm(st##i, vbT##i, permHH); \
516 const vector unsigned char mask##i = \ 516 const vector unsigned char mask##i = \
517 vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \ 517 vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \
542 const vector signed int zero = vec_splat_s32(0); 542 const vector signed int zero = vec_splat_s32(0);
543 vector unsigned char v_dt; 543 vector unsigned char v_dt;
544 dt[0] = deringThreshold; 544 dt[0] = deringThreshold;
545 v_dt = vec_splat(vec_ld(0, dt), 0); 545 v_dt = vec_splat(vec_ld(0, dt), 0);
546 546
547 #define LOAD_LINE(i) \ 547 #define LOAD_LINE(i) \
548 const vector unsigned char perm##i = \ 548 const vector unsigned char perm##i = \
549 vec_lvsl(i * stride, srcCopy); \ 549 vec_lvsl(i * stride, srcCopy); \
550 vector unsigned char sA##i = vec_ld(i * stride, srcCopy); \ 550 vector unsigned char sA##i = vec_ld(i * stride, srcCopy); \
551 vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy); \ 551 vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy); \
552 vector unsigned char src##i = vec_perm(sA##i, sB##i, perm##i) 552 vector unsigned char src##i = vec_perm(sA##i, sB##i, perm##i)
553 553
554 LOAD_LINE(0); 554 LOAD_LINE(0);
555 LOAD_LINE(1); 555 LOAD_LINE(1);
556 LOAD_LINE(2); 556 LOAD_LINE(2);
870 LOAD_LINE(tempBlurred, 6); 870 LOAD_LINE(tempBlurred, 6);
871 LOAD_LINE(tempBlurred, 7); 871 LOAD_LINE(tempBlurred, 7);
872 #undef LOAD_LINE 872 #undef LOAD_LINE
873 873
874 #define ACCUMULATE_DIFFS(i) \ 874 #define ACCUMULATE_DIFFS(i) \
875 vector signed short v_d##i = vec_sub(v_tempBlurredAss##i, \ 875 vector signed short v_d##i = vec_sub(v_tempBlurredAss##i, \
876 v_srcAss##i); \ 876 v_srcAss##i); \
877 v_dp = vec_msums(v_d##i, v_d##i, v_dp); \ 877 v_dp = vec_msums(v_d##i, v_d##i, v_dp); \
878 v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp) 878 v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp)
879 879
880 ACCUMULATE_DIFFS(0); 880 ACCUMULATE_DIFFS(0);
937 const vector signed short vsint16_4 = vec_splat_s16(4); 937 const vector signed short vsint16_4 = vec_splat_s16(4);
938 const vector unsigned short vuint16_3 = vec_splat_u16(3); 938 const vector unsigned short vuint16_3 = vec_splat_u16(3);
939 939
940 #define OP(i) \ 940 #define OP(i) \
941 const vector signed short v_temp##i = \ 941 const vector signed short v_temp##i = \
942 vec_mladd(v_tempBlurredAss##i, \ 942 vec_mladd(v_tempBlurredAss##i, \
943 vsint16_7, v_srcAss##i); \ 943 vsint16_7, v_srcAss##i); \
944 const vector signed short v_temp2##i = \ 944 const vector signed short v_temp2##i = \
945 vec_add(v_temp##i, vsint16_4); \ 945 vec_add(v_temp##i, vsint16_4); \
946 v_tempBlurredAss##i = vec_sr(v_temp2##i, vuint16_3) 946 v_tempBlurredAss##i = vec_sr(v_temp2##i, vuint16_3)
947 947
958 const vector signed short vsint16_3 = vec_splat_s16(3); 958 const vector signed short vsint16_3 = vec_splat_s16(3);
959 const vector signed short vsint16_2 = vec_splat_s16(2); 959 const vector signed short vsint16_2 = vec_splat_s16(2);
960 960
961 #define OP(i) \ 961 #define OP(i) \
962 const vector signed short v_temp##i = \ 962 const vector signed short v_temp##i = \
963 vec_mladd(v_tempBlurredAss##i, \ 963 vec_mladd(v_tempBlurredAss##i, \
964 vsint16_3, v_srcAss##i); \ 964 vsint16_3, v_srcAss##i); \
965 const vector signed short v_temp2##i = \ 965 const vector signed short v_temp2##i = \
966 vec_add(v_temp##i, vsint16_2); \ 966 vec_add(v_temp##i, vsint16_2); \
967 v_tempBlurredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2) 967 v_tempBlurredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2)
968 968
1053 vector unsigned char tempM = vec_mergeh(src6, zero); 1053 vector unsigned char tempM = vec_mergeh(src6, zero);
1054 vector unsigned char tempN = vec_mergel(src6, zero); 1054 vector unsigned char tempN = vec_mergel(src6, zero);
1055 vector unsigned char tempO = vec_mergeh(src7, zero); 1055 vector unsigned char tempO = vec_mergeh(src7, zero);
1056 vector unsigned char tempP = vec_mergel(src7, zero); 1056 vector unsigned char tempP = vec_mergel(src7, zero);
1057 1057
1058 vector unsigned char temp0 = vec_mergeh(tempA, tempI); 1058 vector unsigned char temp0 = vec_mergeh(tempA, tempI);
1059 vector unsigned char temp1 = vec_mergel(tempA, tempI); 1059 vector unsigned char temp1 = vec_mergel(tempA, tempI);
1060 vector unsigned char temp2 = vec_mergeh(tempB, tempJ); 1060 vector unsigned char temp2 = vec_mergeh(tempB, tempJ);
1061 vector unsigned char temp3 = vec_mergel(tempB, tempJ); 1061 vector unsigned char temp3 = vec_mergel(tempB, tempJ);
1062 vector unsigned char temp4 = vec_mergeh(tempC, tempK); 1062 vector unsigned char temp4 = vec_mergeh(tempC, tempK);
1063 vector unsigned char temp5 = vec_mergel(tempC, tempK); 1063 vector unsigned char temp5 = vec_mergel(tempC, tempK);
1064 vector unsigned char temp6 = vec_mergeh(tempD, tempL); 1064 vector unsigned char temp6 = vec_mergeh(tempD, tempL);
1065 vector unsigned char temp7 = vec_mergel(tempD, tempL); 1065 vector unsigned char temp7 = vec_mergel(tempD, tempL);
1066 vector unsigned char temp8 = vec_mergeh(tempE, tempM); 1066 vector unsigned char temp8 = vec_mergeh(tempE, tempM);
1067 vector unsigned char temp9 = vec_mergel(tempE, tempM); 1067 vector unsigned char temp9 = vec_mergel(tempE, tempM);
1068 vector unsigned char temp10 = vec_mergeh(tempF, tempN); 1068 vector unsigned char temp10 = vec_mergeh(tempF, tempN);
1069 vector unsigned char temp11 = vec_mergel(tempF, tempN); 1069 vector unsigned char temp11 = vec_mergel(tempF, tempN);
1070 vector unsigned char temp12 = vec_mergeh(tempG, tempO); 1070 vector unsigned char temp12 = vec_mergeh(tempG, tempO);
1071 vector unsigned char temp13 = vec_mergel(tempG, tempO); 1071 vector unsigned char temp13 = vec_mergel(tempG, tempO);
1072 vector unsigned char temp14 = vec_mergeh(tempH, tempP); 1072 vector unsigned char temp14 = vec_mergeh(tempH, tempP);
1087 tempM = vec_mergeh(temp6, temp14); 1087 tempM = vec_mergeh(temp6, temp14);
1088 tempN = vec_mergel(temp6, temp14); 1088 tempN = vec_mergel(temp6, temp14);
1089 tempO = vec_mergeh(temp7, temp15); 1089 tempO = vec_mergeh(temp7, temp15);
1090 tempP = vec_mergel(temp7, temp15); 1090 tempP = vec_mergel(temp7, temp15);
1091 1091
1092 temp0 = vec_mergeh(tempA, tempI); 1092 temp0 = vec_mergeh(tempA, tempI);
1093 temp1 = vec_mergel(tempA, tempI); 1093 temp1 = vec_mergel(tempA, tempI);
1094 temp2 = vec_mergeh(tempB, tempJ); 1094 temp2 = vec_mergeh(tempB, tempJ);
1095 temp3 = vec_mergel(tempB, tempJ); 1095 temp3 = vec_mergel(tempB, tempJ);
1096 temp4 = vec_mergeh(tempC, tempK); 1096 temp4 = vec_mergeh(tempC, tempK);
1097 temp5 = vec_mergel(tempC, tempK); 1097 temp5 = vec_mergel(tempC, tempK);
1098 temp6 = vec_mergeh(tempD, tempL); 1098 temp6 = vec_mergeh(tempD, tempL);
1099 temp7 = vec_mergel(tempD, tempL); 1099 temp7 = vec_mergel(tempD, tempL);
1100 temp8 = vec_mergeh(tempE, tempM); 1100 temp8 = vec_mergeh(tempE, tempM);
1101 temp9 = vec_mergel(tempE, tempM); 1101 temp9 = vec_mergel(tempE, tempM);
1102 temp10 = vec_mergeh(tempF, tempN); 1102 temp10 = vec_mergeh(tempF, tempN);
1103 temp11 = vec_mergel(tempF, tempN); 1103 temp11 = vec_mergel(tempF, tempN);
1104 temp12 = vec_mergeh(tempG, tempO); 1104 temp12 = vec_mergeh(tempG, tempO);
1105 temp13 = vec_mergel(tempG, tempO); 1105 temp13 = vec_mergel(tempG, tempO);
1106 temp14 = vec_mergeh(tempH, tempP); 1106 temp14 = vec_mergeh(tempH, tempP);
1107 temp15 = vec_mergel(tempH, tempP); 1107 temp15 = vec_mergel(tempH, tempP);
1108 1108
1109 vec_st(temp0, 0, dst); 1109 vec_st(temp0, 0, dst);
1110 vec_st(temp1, 16, dst); 1110 vec_st(temp1, 16, dst);
1111 vec_st(temp2, 32, dst); 1111 vec_st(temp2, 32, dst);
1112 vec_st(temp3, 48, dst); 1112 vec_st(temp3, 48, dst);
1113 vec_st(temp4, 64, dst); 1113 vec_st(temp4, 64, dst);
1114 vec_st(temp5, 80, dst); 1114 vec_st(temp5, 80, dst);
1115 vec_st(temp6, 96, dst); 1115 vec_st(temp6, 96, dst);
1116 vec_st(temp7, 112, dst); 1116 vec_st(temp7, 112, dst);
1117 vec_st(temp8, 128, dst); 1117 vec_st(temp8, 128, dst);
1118 vec_st(temp9, 144, dst); 1118 vec_st(temp9, 144, dst);
1119 vec_st(temp10, 160, dst); 1119 vec_st(temp10, 160, dst);
1120 vec_st(temp11, 176, dst); 1120 vec_st(temp11, 176, dst);
1121 vec_st(temp12, 192, dst); 1121 vec_st(temp12, 192, dst);
1122 vec_st(temp13, 208, dst); 1122 vec_st(temp13, 208, dst);
1123 vec_st(temp14, 224, dst); 1123 vec_st(temp14, 224, dst);