comparison ppc/mpegvideo_altivec.c @ 1001:95cbffdc98a9 libavcodec

dct_unquantize_h263_altivec by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
author michaelni
date Sun, 12 Jan 2003 13:29:24 +0000
parents edc10966b081
children 3b7cc8e4b83f
comparison
equal deleted inserted replaced
1000:3e1f39583bee 1001:95cbffdc98a9
501 } 501 }
502 502
503 return lastNonZero; 503 return lastNonZero;
504 } 504 }
505 505
506 /*
507 AltiVec version of dct_unquantize_h263
508 this code assumes `block' is 16 bytes-aligned
509 */
510 void dct_unquantize_h263_altivec(MpegEncContext *s,
511 DCTELEM *block, int n, int qscale)
512 {
513 int i, level, qmul, qadd;
514 int nCoeffs;
515
516 assert(s->block_last_index[n]>=0);
517
518 qadd = (qscale - 1) | 1;
519 qmul = qscale << 1;
520
521 if (s->mb_intra) {
522 if (!s->h263_aic) {
523 if (n < 4)
524 block[0] = block[0] * s->y_dc_scale;
525 else
526 block[0] = block[0] * s->c_dc_scale;
527 }else
528 qadd = 0;
529 i = 1;
530 nCoeffs= 63; //does not allways use zigzag table
531 } else {
532 i = 0;
533 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
534 }
535
536 #if 0
537 for(;i<=nCoeffs;i++) {
538 level = block[i];
539 if (level) {
540 if (level < 0) {
541 level = level * qmul - qadd;
542 } else {
543 level = level * qmul + qadd;
544 }
545 block[i] = level;
546 }
547 }
548 #else
549 {
550 register const vector short vczero = (const vector short)(0);
551 short __attribute__ ((aligned(16))) qmul8[] =
552 {
553 qmul, qmul, qmul, qmul,
554 qmul, qmul, qmul, qmul
555 };
556 short __attribute__ ((aligned(16))) qadd8[] =
557 {
558 qadd, qadd, qadd, qadd,
559 qadd, qadd, qadd, qadd
560 };
561 short __attribute__ ((aligned(16))) nqadd8[] =
562 {
563 -qadd, -qadd, -qadd, -qadd,
564 -qadd, -qadd, -qadd, -qadd
565 };
566 register vector short blockv, qmulv, qaddv, nqaddv, temp1;
567 register vector bool short blockv_null, blockv_neg;
568 register short backup_0 = block[0];
569 register int j = 0;
570
571 qmulv = vec_ld(0, qmul8);
572 qaddv = vec_ld(0, qadd8);
573 nqaddv = vec_ld(0, nqadd8);
574
575 // first make sure block[j] is 16 bytes-aligned
576 for(j = 0; (j <= nCoeffs) && ((((unsigned long)block) + (j << 1)) & 0x0000000F) ; j++) {
577 level = block[j];
578 if (level) {
579 if (level < 0) {
580 level = level * qmul - qadd;
581 } else {
582 level = level * qmul + qadd;
583 }
584 block[j] = level;
585 }
586 }
587
588 // vectorize all the 16 bytes-aligned blocks
589 // of 8 elements
590 for(; (j + 7) <= nCoeffs ; j+=8)
591 {
592 blockv = vec_ld(j << 1, block);
593 blockv_neg = vec_cmplt(blockv, vczero);
594 blockv_null = vec_cmpeq(blockv, vczero);
595 // choose between +qadd or -qadd as the third operand
596 temp1 = vec_sel(qaddv, nqaddv, blockv_neg);
597 // multiply & add (block{i,i+7} * qmul [+-] qadd)
598 temp1 = vec_mladd(blockv, qmulv, temp1);
599 // put 0 where block[{i,i+7} used to have 0
600 blockv = vec_sel(temp1, blockv, blockv_null);
601 vec_st(blockv, j << 1, block);
602 }
603
604 // if nCoeffs isn't a multiple of 8, finish the job
605 // using good old scalar units.
606 // (we could do it using a truncated vector,
607 // but I'm not sure it's worth the hassle)
608 for(; j <= nCoeffs ; j++) {
609 level = block[j];
610 if (level) {
611 if (level < 0) {
612 level = level * qmul - qadd;
613 } else {
614 level = level * qmul + qadd;
615 }
616 block[j] = level;
617 }
618 }
619
620 if (i == 1)
621 { // cheat. this avoid special-casing the first iteration
622 block[0] = backup_0;
623 }
624 }
625 #endif
626 }