comparison ppc/mpegvideo_altivec.c @ 1033:b4172ff70d27 libavcodec

Altivec on non darwin systems patch by Romain Dolbeau
author bellard
date Sun, 26 Jan 2003 22:29:47 +0000
parents 35cf2f4a0f8c
children f59c3f66363b
comparison
equal deleted inserted replaced
1032:8f440ca8e0b0 1033:b4172ff70d27
88 vec = vec_ld(0, _load_addr); \ 88 vec = vec_ld(0, _load_addr); \
89 vec = vec_perm(vec, vec, _perm_vec); \ 89 vec = vec_perm(vec, vec, _perm_vec); \
90 vec = vec_splat(vec, 0); \ 90 vec = vec_splat(vec, 0); \
91 } 91 }
92 92
93
94 #ifdef CONFIG_DARWIN
95 #define FOUROF(a) (a)
96 #else
97 // slower, for dumb non-apple GCC
98 #define FOUROF(a) {a,a,a,a}
99 #endif
93 int dct_quantize_altivec(MpegEncContext* s, 100 int dct_quantize_altivec(MpegEncContext* s,
94 DCTELEM* data, int n, 101 DCTELEM* data, int n,
95 int qscale, int* overflow) 102 int qscale, int* overflow)
96 { 103 {
97 int lastNonZero; 104 int lastNonZero;
98 vector float row0, row1, row2, row3, row4, row5, row6, row7; 105 vector float row0, row1, row2, row3, row4, row5, row6, row7;
99 vector float alt0, alt1, alt2, alt3, alt4, alt5, alt6, alt7; 106 vector float alt0, alt1, alt2, alt3, alt4, alt5, alt6, alt7;
100 const vector float zero = (const vector float)(0.0f); 107 const vector float zero = (const vector float)FOUROF(0.);
101 108
102 // Load the data into the row/alt vectors 109 // Load the data into the row/alt vectors
103 { 110 {
104 vector signed short data0, data1, data2, data3, data4, data5, data6, data7; 111 vector signed short data0, data1, data2, data3, data4, data5, data6, data7;
105 112
139 // The following block could exist as a separate an altivec dct 146 // The following block could exist as a separate an altivec dct
140 // function. However, if we put it inline, the DCT data can remain 147 // function. However, if we put it inline, the DCT data can remain
141 // in the vector local variables, as floats, which we'll use during the 148 // in the vector local variables, as floats, which we'll use during the
142 // quantize step... 149 // quantize step...
143 { 150 {
144 const vector float vec_0_298631336 = (vector float)(0.298631336f); 151 const vector float vec_0_298631336 = (vector float)FOUROF(0.298631336f);
145 const vector float vec_0_390180644 = (vector float)(-0.390180644f); 152 const vector float vec_0_390180644 = (vector float)FOUROF(-0.390180644f);
146 const vector float vec_0_541196100 = (vector float)(0.541196100f); 153 const vector float vec_0_541196100 = (vector float)FOUROF(0.541196100f);
147 const vector float vec_0_765366865 = (vector float)(0.765366865f); 154 const vector float vec_0_765366865 = (vector float)FOUROF(0.765366865f);
148 const vector float vec_0_899976223 = (vector float)(-0.899976223f); 155 const vector float vec_0_899976223 = (vector float)FOUROF(-0.899976223f);
149 const vector float vec_1_175875602 = (vector float)(1.175875602f); 156 const vector float vec_1_175875602 = (vector float)FOUROF(1.175875602f);
150 const vector float vec_1_501321110 = (vector float)(1.501321110f); 157 const vector float vec_1_501321110 = (vector float)FOUROF(1.501321110f);
151 const vector float vec_1_847759065 = (vector float)(-1.847759065f); 158 const vector float vec_1_847759065 = (vector float)FOUROF(-1.847759065f);
152 const vector float vec_1_961570560 = (vector float)(-1.961570560f); 159 const vector float vec_1_961570560 = (vector float)FOUROF(-1.961570560f);
153 const vector float vec_2_053119869 = (vector float)(2.053119869f); 160 const vector float vec_2_053119869 = (vector float)FOUROF(2.053119869f);
154 const vector float vec_2_562915447 = (vector float)(-2.562915447f); 161 const vector float vec_2_562915447 = (vector float)FOUROF(-2.562915447f);
155 const vector float vec_3_072711026 = (vector float)(3.072711026f); 162 const vector float vec_3_072711026 = (vector float)FOUROF(3.072711026f);
156 163
157 164
158 int whichPass, whichHalf; 165 int whichPass, whichHalf;
159 166
160 for(whichPass = 1; whichPass<=2; whichPass++) 167 for(whichPass = 1; whichPass<=2; whichPass++)
304 311
305 // Load the bias vector (We add 0.5 to the bias so that we're 312 // Load the bias vector (We add 0.5 to the bias so that we're
306 // rounding when we convert to int, instead of flooring.) 313 // rounding when we convert to int, instead of flooring.)
307 { 314 {
308 vector signed int biasInt; 315 vector signed int biasInt;
309 const vector float negOneFloat = (vector float)(-1.0f); 316 const vector float negOneFloat = (vector float)FOUROF(-1.0f);
310 LOAD4(biasInt, biasAddr); 317 LOAD4(biasInt, biasAddr);
311 bias = vec_ctf(biasInt, QUANT_BIAS_SHIFT); 318 bias = vec_ctf(biasInt, QUANT_BIAS_SHIFT);
312 negBias = vec_madd(bias, negOneFloat, zero); 319 negBias = vec_madd(bias, negOneFloat, zero);
313 } 320 }
314 321
501 s->intra_scantable.scantable, lastNonZero); 508 s->intra_scantable.scantable, lastNonZero);
502 } 509 }
503 510
504 return lastNonZero; 511 return lastNonZero;
505 } 512 }
513 #undef FOUROF
506 514
507 /* 515 /*
508 AltiVec version of dct_unquantize_h263 516 AltiVec version of dct_unquantize_h263
509 this code assumes `block' is 16 bytes-aligned 517 this code assumes `block' is 16 bytes-aligned
510 */ 518 */
549 block[i] = level; 557 block[i] = level;
550 } 558 }
551 } 559 }
552 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ 560 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
553 { 561 {
554 register const vector short vczero = (const vector short)(0); 562 register const vector short vczero = (const vector short)vec_splat_s16(0);
555 short __attribute__ ((aligned(16))) qmul8[] = 563 short __attribute__ ((aligned(16))) qmul8[] =
556 { 564 {
557 qmul, qmul, qmul, qmul, 565 qmul, qmul, qmul, qmul,
558 qmul, qmul, qmul, qmul 566 qmul, qmul, qmul, qmul
559 }; 567 };