Mercurial > libpostproc.hg
comparison postprocess_altivec_template.c @ 104:883d75fc0712 libpostproc
cosmetics: prettyprinting
author | diego |
---|---|
date | Sun, 23 Mar 2008 16:01:13 +0000 |
parents | 53295ae2d45e |
children | 83d51d1fb580 |
comparison
equal
deleted
inserted
replaced
103:53295ae2d45e | 104:883d75fc0712 |
---|---|
352 const vector signed char neg1 = vec_splat_s8(-1); | 352 const vector signed char neg1 = vec_splat_s8(-1); |
353 const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | 353 const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
354 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | 354 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); |
355 | 355 |
356 #define PACK_AND_STORE(i) \ | 356 #define PACK_AND_STORE(i) \ |
357 { const vector unsigned char perms##i = \ | 357 { const vector unsigned char perms##i = \ |
358 vec_lvsr(i * stride, src2); \ | 358 vec_lvsr(i * stride, src2); \ |
359 const vector unsigned char vf##i = \ | 359 const vector unsigned char vf##i = \ |
360 vec_packsu(vr##i, (vector signed short)zero); \ | 360 vec_packsu(vr##i, (vector signed short)zero); \ |
361 const vector unsigned char vg##i = \ | 361 const vector unsigned char vg##i = \ |
362 vec_perm(vf##i, vbT##i, permHH); \ | 362 vec_perm(vf##i, vbT##i, permHH); \ |
370 vec_sel(vg2##i, vbB##i, mask##i); \ | 370 vec_sel(vg2##i, vbB##i, mask##i); \ |
371 vec_st(svA##i, i * stride, src2); \ | 371 vec_st(svA##i, i * stride, src2); \ |
372 vec_st(svB##i, i * stride + 16, src2);} | 372 vec_st(svB##i, i * stride + 16, src2);} |
373 | 373 |
374 #define PACK_AND_STORE_ALIGNED(i) \ | 374 #define PACK_AND_STORE_ALIGNED(i) \ |
375 { const vector unsigned char vf##i = \ | 375 { const vector unsigned char vf##i = \ |
376 vec_packsu(vr##i, (vector signed short)zero); \ | 376 vec_packsu(vr##i, (vector signed short)zero); \ |
377 const vector unsigned char vg##i = \ | 377 const vector unsigned char vg##i = \ |
378 vec_perm(vf##i, vbT##i, permHH); \ | 378 vec_perm(vf##i, vbT##i, permHH); \ |
379 vec_st(vg##i, i * stride, src2);} | 379 vec_st(vg##i, i * stride, src2);} |
380 | 380 |
507 const vector signed char neg1 = vec_splat_s8(-1); | 507 const vector signed char neg1 = vec_splat_s8(-1); |
508 const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | 508 const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
509 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); | 509 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); |
510 | 510 |
511 #define STORE(i) \ | 511 #define STORE(i) \ |
512 { const vector unsigned char perms##i = \ | 512 { const vector unsigned char perms##i = \ |
513 vec_lvsr(i * stride, src2); \ | 513 vec_lvsr(i * stride, src2); \ |
514 const vector unsigned char vg##i = \ | 514 const vector unsigned char vg##i = \ |
515 vec_perm(st##i, vbT##i, permHH); \ | 515 vec_perm(st##i, vbT##i, permHH); \ |
516 const vector unsigned char mask##i = \ | 516 const vector unsigned char mask##i = \ |
517 vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \ | 517 vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \ |
542 const vector signed int zero = vec_splat_s32(0); | 542 const vector signed int zero = vec_splat_s32(0); |
543 vector unsigned char v_dt; | 543 vector unsigned char v_dt; |
544 dt[0] = deringThreshold; | 544 dt[0] = deringThreshold; |
545 v_dt = vec_splat(vec_ld(0, dt), 0); | 545 v_dt = vec_splat(vec_ld(0, dt), 0); |
546 | 546 |
547 #define LOAD_LINE(i) \ | 547 #define LOAD_LINE(i) \ |
548 const vector unsigned char perm##i = \ | 548 const vector unsigned char perm##i = \ |
549 vec_lvsl(i * stride, srcCopy); \ | 549 vec_lvsl(i * stride, srcCopy); \ |
550 vector unsigned char sA##i = vec_ld(i * stride, srcCopy); \ | 550 vector unsigned char sA##i = vec_ld(i * stride, srcCopy); \ |
551 vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy); \ | 551 vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy); \ |
552 vector unsigned char src##i = vec_perm(sA##i, sB##i, perm##i) | 552 vector unsigned char src##i = vec_perm(sA##i, sB##i, perm##i) |
553 | 553 |
554 LOAD_LINE(0); | 554 LOAD_LINE(0); |
555 LOAD_LINE(1); | 555 LOAD_LINE(1); |
556 LOAD_LINE(2); | 556 LOAD_LINE(2); |
870 LOAD_LINE(tempBlurred, 6); | 870 LOAD_LINE(tempBlurred, 6); |
871 LOAD_LINE(tempBlurred, 7); | 871 LOAD_LINE(tempBlurred, 7); |
872 #undef LOAD_LINE | 872 #undef LOAD_LINE |
873 | 873 |
874 #define ACCUMULATE_DIFFS(i) \ | 874 #define ACCUMULATE_DIFFS(i) \ |
875 vector signed short v_d##i = vec_sub(v_tempBlurredAss##i, \ | 875 vector signed short v_d##i = vec_sub(v_tempBlurredAss##i, \ |
876 v_srcAss##i); \ | 876 v_srcAss##i); \ |
877 v_dp = vec_msums(v_d##i, v_d##i, v_dp); \ | 877 v_dp = vec_msums(v_d##i, v_d##i, v_dp); \ |
878 v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp) | 878 v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp) |
879 | 879 |
880 ACCUMULATE_DIFFS(0); | 880 ACCUMULATE_DIFFS(0); |
937 const vector signed short vsint16_4 = vec_splat_s16(4); | 937 const vector signed short vsint16_4 = vec_splat_s16(4); |
938 const vector unsigned short vuint16_3 = vec_splat_u16(3); | 938 const vector unsigned short vuint16_3 = vec_splat_u16(3); |
939 | 939 |
940 #define OP(i) \ | 940 #define OP(i) \ |
941 const vector signed short v_temp##i = \ | 941 const vector signed short v_temp##i = \ |
942 vec_mladd(v_tempBlurredAss##i, \ | 942 vec_mladd(v_tempBlurredAss##i, \ |
943 vsint16_7, v_srcAss##i); \ | 943 vsint16_7, v_srcAss##i); \ |
944 const vector signed short v_temp2##i = \ | 944 const vector signed short v_temp2##i = \ |
945 vec_add(v_temp##i, vsint16_4); \ | 945 vec_add(v_temp##i, vsint16_4); \ |
946 v_tempBlurredAss##i = vec_sr(v_temp2##i, vuint16_3) | 946 v_tempBlurredAss##i = vec_sr(v_temp2##i, vuint16_3) |
947 | 947 |
958 const vector signed short vsint16_3 = vec_splat_s16(3); | 958 const vector signed short vsint16_3 = vec_splat_s16(3); |
959 const vector signed short vsint16_2 = vec_splat_s16(2); | 959 const vector signed short vsint16_2 = vec_splat_s16(2); |
960 | 960 |
961 #define OP(i) \ | 961 #define OP(i) \ |
962 const vector signed short v_temp##i = \ | 962 const vector signed short v_temp##i = \ |
963 vec_mladd(v_tempBlurredAss##i, \ | 963 vec_mladd(v_tempBlurredAss##i, \ |
964 vsint16_3, v_srcAss##i); \ | 964 vsint16_3, v_srcAss##i); \ |
965 const vector signed short v_temp2##i = \ | 965 const vector signed short v_temp2##i = \ |
966 vec_add(v_temp##i, vsint16_2); \ | 966 vec_add(v_temp##i, vsint16_2); \ |
967 v_tempBlurredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2) | 967 v_tempBlurredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2) |
968 | 968 |
1053 vector unsigned char tempM = vec_mergeh(src6, zero); | 1053 vector unsigned char tempM = vec_mergeh(src6, zero); |
1054 vector unsigned char tempN = vec_mergel(src6, zero); | 1054 vector unsigned char tempN = vec_mergel(src6, zero); |
1055 vector unsigned char tempO = vec_mergeh(src7, zero); | 1055 vector unsigned char tempO = vec_mergeh(src7, zero); |
1056 vector unsigned char tempP = vec_mergel(src7, zero); | 1056 vector unsigned char tempP = vec_mergel(src7, zero); |
1057 | 1057 |
1058 vector unsigned char temp0 = vec_mergeh(tempA, tempI); | 1058 vector unsigned char temp0 = vec_mergeh(tempA, tempI); |
1059 vector unsigned char temp1 = vec_mergel(tempA, tempI); | 1059 vector unsigned char temp1 = vec_mergel(tempA, tempI); |
1060 vector unsigned char temp2 = vec_mergeh(tempB, tempJ); | 1060 vector unsigned char temp2 = vec_mergeh(tempB, tempJ); |
1061 vector unsigned char temp3 = vec_mergel(tempB, tempJ); | 1061 vector unsigned char temp3 = vec_mergel(tempB, tempJ); |
1062 vector unsigned char temp4 = vec_mergeh(tempC, tempK); | 1062 vector unsigned char temp4 = vec_mergeh(tempC, tempK); |
1063 vector unsigned char temp5 = vec_mergel(tempC, tempK); | 1063 vector unsigned char temp5 = vec_mergel(tempC, tempK); |
1064 vector unsigned char temp6 = vec_mergeh(tempD, tempL); | 1064 vector unsigned char temp6 = vec_mergeh(tempD, tempL); |
1065 vector unsigned char temp7 = vec_mergel(tempD, tempL); | 1065 vector unsigned char temp7 = vec_mergel(tempD, tempL); |
1066 vector unsigned char temp8 = vec_mergeh(tempE, tempM); | 1066 vector unsigned char temp8 = vec_mergeh(tempE, tempM); |
1067 vector unsigned char temp9 = vec_mergel(tempE, tempM); | 1067 vector unsigned char temp9 = vec_mergel(tempE, tempM); |
1068 vector unsigned char temp10 = vec_mergeh(tempF, tempN); | 1068 vector unsigned char temp10 = vec_mergeh(tempF, tempN); |
1069 vector unsigned char temp11 = vec_mergel(tempF, tempN); | 1069 vector unsigned char temp11 = vec_mergel(tempF, tempN); |
1070 vector unsigned char temp12 = vec_mergeh(tempG, tempO); | 1070 vector unsigned char temp12 = vec_mergeh(tempG, tempO); |
1071 vector unsigned char temp13 = vec_mergel(tempG, tempO); | 1071 vector unsigned char temp13 = vec_mergel(tempG, tempO); |
1072 vector unsigned char temp14 = vec_mergeh(tempH, tempP); | 1072 vector unsigned char temp14 = vec_mergeh(tempH, tempP); |
1087 tempM = vec_mergeh(temp6, temp14); | 1087 tempM = vec_mergeh(temp6, temp14); |
1088 tempN = vec_mergel(temp6, temp14); | 1088 tempN = vec_mergel(temp6, temp14); |
1089 tempO = vec_mergeh(temp7, temp15); | 1089 tempO = vec_mergeh(temp7, temp15); |
1090 tempP = vec_mergel(temp7, temp15); | 1090 tempP = vec_mergel(temp7, temp15); |
1091 | 1091 |
1092 temp0 = vec_mergeh(tempA, tempI); | 1092 temp0 = vec_mergeh(tempA, tempI); |
1093 temp1 = vec_mergel(tempA, tempI); | 1093 temp1 = vec_mergel(tempA, tempI); |
1094 temp2 = vec_mergeh(tempB, tempJ); | 1094 temp2 = vec_mergeh(tempB, tempJ); |
1095 temp3 = vec_mergel(tempB, tempJ); | 1095 temp3 = vec_mergel(tempB, tempJ); |
1096 temp4 = vec_mergeh(tempC, tempK); | 1096 temp4 = vec_mergeh(tempC, tempK); |
1097 temp5 = vec_mergel(tempC, tempK); | 1097 temp5 = vec_mergel(tempC, tempK); |
1098 temp6 = vec_mergeh(tempD, tempL); | 1098 temp6 = vec_mergeh(tempD, tempL); |
1099 temp7 = vec_mergel(tempD, tempL); | 1099 temp7 = vec_mergel(tempD, tempL); |
1100 temp8 = vec_mergeh(tempE, tempM); | 1100 temp8 = vec_mergeh(tempE, tempM); |
1101 temp9 = vec_mergel(tempE, tempM); | 1101 temp9 = vec_mergel(tempE, tempM); |
1102 temp10 = vec_mergeh(tempF, tempN); | 1102 temp10 = vec_mergeh(tempF, tempN); |
1103 temp11 = vec_mergel(tempF, tempN); | 1103 temp11 = vec_mergel(tempF, tempN); |
1104 temp12 = vec_mergeh(tempG, tempO); | 1104 temp12 = vec_mergeh(tempG, tempO); |
1105 temp13 = vec_mergel(tempG, tempO); | 1105 temp13 = vec_mergel(tempG, tempO); |
1106 temp14 = vec_mergeh(tempH, tempP); | 1106 temp14 = vec_mergeh(tempH, tempP); |
1107 temp15 = vec_mergel(tempH, tempP); | 1107 temp15 = vec_mergel(tempH, tempP); |
1108 | 1108 |
1109 vec_st(temp0, 0, dst); | 1109 vec_st(temp0, 0, dst); |
1110 vec_st(temp1, 16, dst); | 1110 vec_st(temp1, 16, dst); |
1111 vec_st(temp2, 32, dst); | 1111 vec_st(temp2, 32, dst); |
1112 vec_st(temp3, 48, dst); | 1112 vec_st(temp3, 48, dst); |
1113 vec_st(temp4, 64, dst); | 1113 vec_st(temp4, 64, dst); |
1114 vec_st(temp5, 80, dst); | 1114 vec_st(temp5, 80, dst); |
1115 vec_st(temp6, 96, dst); | 1115 vec_st(temp6, 96, dst); |
1116 vec_st(temp7, 112, dst); | 1116 vec_st(temp7, 112, dst); |
1117 vec_st(temp8, 128, dst); | 1117 vec_st(temp8, 128, dst); |
1118 vec_st(temp9, 144, dst); | 1118 vec_st(temp9, 144, dst); |
1119 vec_st(temp10, 160, dst); | 1119 vec_st(temp10, 160, dst); |
1120 vec_st(temp11, 176, dst); | 1120 vec_st(temp11, 176, dst); |
1121 vec_st(temp12, 192, dst); | 1121 vec_st(temp12, 192, dst); |
1122 vec_st(temp13, 208, dst); | 1122 vec_st(temp13, 208, dst); |
1123 vec_st(temp14, 224, dst); | 1123 vec_st(temp14, 224, dst); |