comparison ppc/dsputil_h264_template_altivec.c @ 2967:ef2149182f1c libavcodec

COSMETICS: Remove all trailing whitespace.
author diego
date Sat, 17 Dec 2005 18:14:38 +0000
parents b0102ea621dd
children 0b546eab515d
comparison
equal deleted inserted replaced
2966:564788471dd4 2967:ef2149182f1c
45 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); 45 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
46 } 46 }
47 47
48 register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; 48 register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
49 register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; 49 register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
50 50
51 vector unsigned char vsrcAuc; 51 vector unsigned char vsrcAuc;
52 vector unsigned char vsrcBuc; 52 vector unsigned char vsrcBuc;
53 vector unsigned char vsrcperm0; 53 vector unsigned char vsrcperm0;
54 vector unsigned char vsrcperm1; 54 vector unsigned char vsrcperm1;
55 vsrcAuc = vec_ld(0, src); 55 vsrcAuc = vec_ld(0, src);
56 if (loadSecond) 56 if (loadSecond)
57 vsrcBuc = vec_ld(16, src); 57 vsrcBuc = vec_ld(16, src);
58 vsrcperm0 = vec_lvsl(0, src); 58 vsrcperm0 = vec_lvsl(0, src);
59 vsrcperm1 = vec_lvsl(1, src); 59 vsrcperm1 = vec_lvsl(1, src);
60 60
61 vector unsigned char vsrc0uc; 61 vector unsigned char vsrc0uc;
62 vector unsigned char vsrc1uc; 62 vector unsigned char vsrc1uc;
63 vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0); 63 vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
64 if (reallyBadAlign) 64 if (reallyBadAlign)
65 vsrc1uc = vsrcBuc; 65 vsrc1uc = vsrcBuc;
66 else 66 else
67 vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1); 67 vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
68 68
69 vector signed short vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc0uc); 69 vector signed short vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc0uc);
70 vector signed short vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc1uc); 70 vector signed short vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc1uc);
71 71
72 if (!loadSecond) {// -> !reallyBadAlign 72 if (!loadSecond) {// -> !reallyBadAlign
73 for (i = 0 ; i < h ; i++) { 73 for (i = 0 ; i < h ; i++) {
74 vector unsigned char vsrcCuc; 74 vector unsigned char vsrcCuc;
75 vsrcCuc = vec_ld(stride + 0, src); 75 vsrcCuc = vec_ld(stride + 0, src);
76 76
77 vector unsigned char vsrc2uc; 77 vector unsigned char vsrc2uc;
78 vector unsigned char vsrc3uc; 78 vector unsigned char vsrc3uc;
79 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); 79 vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
80 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); 80 vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
81 81
82 vector signed short vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc2uc); 82 vector signed short vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc2uc);
83 vector signed short vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc3uc); 83 vector signed short vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc3uc);
84 84
85 vector signed short psum; 85 vector signed short psum;
86 86
87 psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0)); 87 psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
88 psum = vec_mladd(vB, vsrc1ssH, psum); 88 psum = vec_mladd(vB, vsrc1ssH, psum);
89 psum = vec_mladd(vC, vsrc2ssH, psum); 89 psum = vec_mladd(vC, vsrc2ssH, psum);
90 psum = vec_mladd(vD, vsrc3ssH, psum); 90 psum = vec_mladd(vD, vsrc3ssH, psum);
91 psum = vec_add(v32ss, psum); 91 psum = vec_add(v32ss, psum);
92 psum = vec_sra(psum, v6us); 92 psum = vec_sra(psum, v6us);
93 93
94 vector unsigned char vdst = vec_ld(0, dst); 94 vector unsigned char vdst = vec_ld(0, dst);
95 vector unsigned char ppsum = (vector unsigned char)vec_packsu(psum, psum); 95 vector unsigned char ppsum = (vector unsigned char)vec_packsu(psum, psum);
96 96
97 vector unsigned char vfdst = vec_perm(vdst, ppsum, fperm); 97 vector unsigned char vfdst = vec_perm(vdst, ppsum, fperm);
98 vector unsigned char fsum; 98 vector unsigned char fsum;
99 99
100 OP_U8_ALTIVEC(fsum, vfdst, vdst); 100 OP_U8_ALTIVEC(fsum, vfdst, vdst);
101 101
102 vec_st(fsum, 0, dst); 102 vec_st(fsum, 0, dst);
103 103
104 vsrc0ssH = vsrc2ssH; 104 vsrc0ssH = vsrc2ssH;
105 vsrc1ssH = vsrc3ssH; 105 vsrc1ssH = vsrc3ssH;
106 106
107 dst += stride; 107 dst += stride;
108 src += stride; 108 src += stride;
109 } 109 }
110 } else { 110 } else {
111 for (i = 0 ; i < h ; i++) { 111 for (i = 0 ; i < h ; i++) {
112 vector unsigned char vsrcCuc; 112 vector unsigned char vsrcCuc;
113 vector unsigned char vsrcDuc; 113 vector unsigned char vsrcDuc;
114 vsrcCuc = vec_ld(stride + 0, src); 114 vsrcCuc = vec_ld(stride + 0, src);
115 vsrcDuc = vec_ld(stride + 16, src); 115 vsrcDuc = vec_ld(stride + 16, src);
116 116
117 vector unsigned char vsrc2uc; 117 vector unsigned char vsrc2uc;
118 vector unsigned char vsrc3uc; 118 vector unsigned char vsrc3uc;
119 vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); 119 vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
120 if (reallyBadAlign) 120 if (reallyBadAlign)
121 vsrc3uc = vsrcDuc; 121 vsrc3uc = vsrcDuc;
122 else 122 else
123 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); 123 vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
124 124
125 vector signed short vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc2uc); 125 vector signed short vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc2uc);
126 vector signed short vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc3uc); 126 vector signed short vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc3uc);
127 127
128 vector signed short psum; 128 vector signed short psum;
129 129
130 psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0)); 130 psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
131 psum = vec_mladd(vB, vsrc1ssH, psum); 131 psum = vec_mladd(vB, vsrc1ssH, psum);
132 psum = vec_mladd(vC, vsrc2ssH, psum); 132 psum = vec_mladd(vC, vsrc2ssH, psum);
133 psum = vec_mladd(vD, vsrc3ssH, psum); 133 psum = vec_mladd(vD, vsrc3ssH, psum);
134 psum = vec_add(v32ss, psum); 134 psum = vec_add(v32ss, psum);
135 psum = vec_sr(psum, v6us); 135 psum = vec_sr(psum, v6us);
136 136
137 vector unsigned char vdst = vec_ld(0, dst); 137 vector unsigned char vdst = vec_ld(0, dst);
138 vector unsigned char ppsum = (vector unsigned char)vec_pack(psum, psum); 138 vector unsigned char ppsum = (vector unsigned char)vec_pack(psum, psum);
139 139
140 vector unsigned char vfdst = vec_perm(vdst, ppsum, fperm); 140 vector unsigned char vfdst = vec_perm(vdst, ppsum, fperm);
141 vector unsigned char fsum; 141 vector unsigned char fsum;
142 142
143 OP_U8_ALTIVEC(fsum, vfdst, vdst); 143 OP_U8_ALTIVEC(fsum, vfdst, vdst);
144 144
145 vec_st(fsum, 0, dst); 145 vec_st(fsum, 0, dst);
146 146
147 vsrc0ssH = vsrc2ssH; 147 vsrc0ssH = vsrc2ssH;
148 vsrc1ssH = vsrc3ssH; 148 vsrc1ssH = vsrc3ssH;
149 149
150 dst += stride; 150 dst += stride;
151 src += stride; 151 src += stride;
152 } 152 }
153 } 153 }
154 POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1); 154 POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1);
157 /* this code assume stride % 16 == 0 */ 157 /* this code assume stride % 16 == 0 */
158 static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) { 158 static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
159 POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1); 159 POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1);
160 POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1); 160 POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
161 register int i; 161 register int i;
162 162
163 const vector signed int vzero = vec_splat_s32(0); 163 const vector signed int vzero = vec_splat_s32(0);
164 const vector unsigned char permM2 = vec_lvsl(-2, src); 164 const vector unsigned char permM2 = vec_lvsl(-2, src);
165 const vector unsigned char permM1 = vec_lvsl(-1, src); 165 const vector unsigned char permM1 = vec_lvsl(-1, src);
166 const vector unsigned char permP0 = vec_lvsl(+0, src); 166 const vector unsigned char permP0 = vec_lvsl(+0, src);
167 const vector unsigned char permP1 = vec_lvsl(+1, src); 167 const vector unsigned char permP1 = vec_lvsl(+1, src);
256 const vector signed short sum1B = vec_adds(srcP0B, srcP1B); 256 const vector signed short sum1B = vec_adds(srcP0B, srcP1B);
257 const vector signed short sum2A = vec_adds(srcM1A, srcP2A); 257 const vector signed short sum2A = vec_adds(srcM1A, srcP2A);
258 const vector signed short sum2B = vec_adds(srcM1B, srcP2B); 258 const vector signed short sum2B = vec_adds(srcM1B, srcP2B);
259 const vector signed short sum3A = vec_adds(srcM2A, srcP3A); 259 const vector signed short sum3A = vec_adds(srcM2A, srcP3A);
260 const vector signed short sum3B = vec_adds(srcM2B, srcP3B); 260 const vector signed short sum3B = vec_adds(srcM2B, srcP3B);
261 261
262 const vector signed short pp1A = vec_mladd(sum1A, v20ss, v16ss); 262 const vector signed short pp1A = vec_mladd(sum1A, v20ss, v16ss);
263 const vector signed short pp1B = vec_mladd(sum1B, v20ss, v16ss); 263 const vector signed short pp1B = vec_mladd(sum1B, v20ss, v16ss);
264 264
265 const vector signed short pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero); 265 const vector signed short pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
266 const vector signed short pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero); 266 const vector signed short pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
267 267
268 const vector signed short pp3A = vec_add(sum3A, pp1A); 268 const vector signed short pp3A = vec_add(sum3A, pp1A);
269 const vector signed short pp3B = vec_add(sum3B, pp1B); 269 const vector signed short pp3B = vec_add(sum3B, pp1B);
270 270
271 const vector signed short psumA = vec_sub(pp3A, pp2A); 271 const vector signed short psumA = vec_sub(pp3A, pp2A);
272 const vector signed short psumB = vec_sub(pp3B, pp2B); 272 const vector signed short psumB = vec_sub(pp3B, pp2B);
298 298
299 /* this code assume stride % 16 == 0 */ 299 /* this code assume stride % 16 == 0 */
300 static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) { 300 static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
301 POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1); 301 POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1);
302 POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1); 302 POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
303 303
304 register int i; 304 register int i;
305 305
306 const vector signed int vzero = vec_splat_s32(0); 306 const vector signed int vzero = vec_splat_s32(0);
307 const vector unsigned char perm = vec_lvsl(0, src); 307 const vector unsigned char perm = vec_lvsl(0, src);
308 const vector signed short v20ss = (const vector signed short)AVV(20); 308 const vector signed short v20ss = (const vector signed short)AVV(20);
310 const vector signed short v5ss = vec_splat_s16(5); 310 const vector signed short v5ss = vec_splat_s16(5);
311 const vector signed short v16ss = (const vector signed short)AVV(16); 311 const vector signed short v16ss = (const vector signed short)AVV(16);
312 const vector unsigned char dstperm = vec_lvsr(0, dst); 312 const vector unsigned char dstperm = vec_lvsr(0, dst);
313 const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1); 313 const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1);
314 const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm); 314 const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);
315 315
316 uint8_t *srcbis = src - (srcStride * 2); 316 uint8_t *srcbis = src - (srcStride * 2);
317 317
318 const vector unsigned char srcM2a = vec_ld(0, srcbis); 318 const vector unsigned char srcM2a = vec_ld(0, srcbis);
319 const vector unsigned char srcM2b = vec_ld(16, srcbis); 319 const vector unsigned char srcM2b = vec_ld(16, srcbis);
320 const vector unsigned char srcM2 = vec_perm(srcM2a, srcM2b, perm); 320 const vector unsigned char srcM2 = vec_perm(srcM2a, srcM2b, perm);
370 srcP0ssB = srcP1ssB; 370 srcP0ssB = srcP1ssB;
371 srcP1ssA = srcP2ssA; 371 srcP1ssA = srcP2ssA;
372 srcP1ssB = srcP2ssB; 372 srcP1ssB = srcP2ssB;
373 srcP2ssA = srcP3ssA; 373 srcP2ssA = srcP3ssA;
374 srcP2ssB = srcP3ssB; 374 srcP2ssB = srcP3ssB;
375 375
376 const vector signed short pp1A = vec_mladd(sum1A, v20ss, v16ss); 376 const vector signed short pp1A = vec_mladd(sum1A, v20ss, v16ss);
377 const vector signed short pp1B = vec_mladd(sum1B, v20ss, v16ss); 377 const vector signed short pp1B = vec_mladd(sum1B, v20ss, v16ss);
378 378
379 const vector signed short pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero); 379 const vector signed short pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
380 const vector signed short pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero); 380 const vector signed short pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
381 381
382 const vector signed short pp3A = vec_add(sum3A, pp1A); 382 const vector signed short pp3A = vec_add(sum3A, pp1A);
383 const vector signed short pp3B = vec_add(sum3B, pp1B); 383 const vector signed short pp3B = vec_add(sum3B, pp1B);
384 384
385 const vector signed short psumA = vec_sub(pp3A, pp2A); 385 const vector signed short psumA = vec_sub(pp3A, pp2A);
386 const vector signed short psumB = vec_sub(pp3B, pp2B); 386 const vector signed short psumB = vec_sub(pp3B, pp2B);
511 const vector signed short sum1B = vec_adds(srcP0B, srcP1B); 511 const vector signed short sum1B = vec_adds(srcP0B, srcP1B);
512 const vector signed short sum2A = vec_adds(srcM1A, srcP2A); 512 const vector signed short sum2A = vec_adds(srcM1A, srcP2A);
513 const vector signed short sum2B = vec_adds(srcM1B, srcP2B); 513 const vector signed short sum2B = vec_adds(srcM1B, srcP2B);
514 const vector signed short sum3A = vec_adds(srcM2A, srcP3A); 514 const vector signed short sum3A = vec_adds(srcM2A, srcP3A);
515 const vector signed short sum3B = vec_adds(srcM2B, srcP3B); 515 const vector signed short sum3B = vec_adds(srcM2B, srcP3B);
516 516
517 const vector signed short pp1A = vec_mladd(sum1A, v20ss, sum3A); 517 const vector signed short pp1A = vec_mladd(sum1A, v20ss, sum3A);
518 const vector signed short pp1B = vec_mladd(sum1B, v20ss, sum3B); 518 const vector signed short pp1B = vec_mladd(sum1B, v20ss, sum3B);
519 519
520 const vector signed short pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero); 520 const vector signed short pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
521 const vector signed short pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero); 521 const vector signed short pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
523 const vector signed short psumA = vec_sub(pp1A, pp2A); 523 const vector signed short psumA = vec_sub(pp1A, pp2A);
524 const vector signed short psumB = vec_sub(pp1B, pp2B); 524 const vector signed short psumB = vec_sub(pp1B, pp2B);
525 525
526 vec_st(psumA, 0, tmp); 526 vec_st(psumA, 0, tmp);
527 vec_st(psumB, 16, tmp); 527 vec_st(psumB, 16, tmp);
528 528
529 src += srcStride; 529 src += srcStride;
530 tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */ 530 tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */
531 } 531 }
532 532
533 const vector unsigned char dstperm = vec_lvsr(0, dst); 533 const vector unsigned char dstperm = vec_lvsr(0, dst);
534 const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1); 534 const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1);
535 const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm); 535 const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);
536 const vector unsigned char mperm = (const vector unsigned char) 536 const vector unsigned char mperm = (const vector unsigned char)
537 AVV(0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B, 537 AVV(0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B,
538 0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F); 538 0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F);
539 539
540 int16_t *tmpbis = tmp - (tmpStride * 21); 540 int16_t *tmpbis = tmp - (tmpStride * 21);
541 541
542 vector signed short tmpM2ssA = vec_ld(0, tmpbis); 542 vector signed short tmpM2ssA = vec_ld(0, tmpbis);
543 vector signed short tmpM2ssB = vec_ld(16, tmpbis); 543 vector signed short tmpM2ssB = vec_ld(16, tmpbis);
544 tmpbis += tmpStride; 544 tmpbis += tmpStride;
605 605
606 const vector signed int sumAe = vec_add(pp1cAe, pp32Ae); 606 const vector signed int sumAe = vec_add(pp1cAe, pp32Ae);
607 const vector signed int sumAo = vec_add(pp1cAo, pp32Ao); 607 const vector signed int sumAo = vec_add(pp1cAo, pp32Ao);
608 const vector signed int sumBe = vec_add(pp1cBe, pp32Be); 608 const vector signed int sumBe = vec_add(pp1cBe, pp32Be);
609 const vector signed int sumBo = vec_add(pp1cBo, pp32Bo); 609 const vector signed int sumBo = vec_add(pp1cBo, pp32Bo);
610 610
611 const vector signed int ssumAe = vec_sra(sumAe, v10ui); 611 const vector signed int ssumAe = vec_sra(sumAe, v10ui);
612 const vector signed int ssumAo = vec_sra(sumAo, v10ui); 612 const vector signed int ssumAo = vec_sra(sumAo, v10ui);
613 const vector signed int ssumBe = vec_sra(sumBe, v10ui); 613 const vector signed int ssumBe = vec_sra(sumBe, v10ui);
614 const vector signed int ssumBo = vec_sra(sumBo, v10ui); 614 const vector signed int ssumBo = vec_sra(sumBo, v10ui);
615 615