Mercurial > libavcodec.hg
comparison ppc/mpegvideo_altivec.c @ 1001:95cbffdc98a9 libavcodec
dct_unquantize_h263_altivec by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
author | michaelni |
---|---|
date | Sun, 12 Jan 2003 13:29:24 +0000 |
parents | edc10966b081 |
children | 3b7cc8e4b83f |
comparison
equal
deleted
inserted
replaced
1000:3e1f39583bee | 1001:95cbffdc98a9 |
---|---|
501 } | 501 } |
502 | 502 |
503 return lastNonZero; | 503 return lastNonZero; |
504 } | 504 } |
505 | 505 |
506 /* | |
507 AltiVec version of dct_unquantize_h263 | |
508 this code assumes `block' is 16 bytes-aligned | |
509 */ | |
510 void dct_unquantize_h263_altivec(MpegEncContext *s, | |
511 DCTELEM *block, int n, int qscale) | |
512 { | |
513 int i, level, qmul, qadd; | |
514 int nCoeffs; | |
515 | |
516 assert(s->block_last_index[n]>=0); | |
517 | |
518 qadd = (qscale - 1) | 1; | |
519 qmul = qscale << 1; | |
520 | |
521 if (s->mb_intra) { | |
522 if (!s->h263_aic) { | |
523 if (n < 4) | |
524 block[0] = block[0] * s->y_dc_scale; | |
525 else | |
526 block[0] = block[0] * s->c_dc_scale; | |
527 }else | |
528 qadd = 0; | |
529 i = 1; | |
530 nCoeffs= 63; //does not allways use zigzag table | |
531 } else { | |
532 i = 0; | |
533 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; | |
534 } | |
535 | |
536 #if 0 | |
537 for(;i<=nCoeffs;i++) { | |
538 level = block[i]; | |
539 if (level) { | |
540 if (level < 0) { | |
541 level = level * qmul - qadd; | |
542 } else { | |
543 level = level * qmul + qadd; | |
544 } | |
545 block[i] = level; | |
546 } | |
547 } | |
548 #else | |
549 { | |
550 register const vector short vczero = (const vector short)(0); | |
551 short __attribute__ ((aligned(16))) qmul8[] = | |
552 { | |
553 qmul, qmul, qmul, qmul, | |
554 qmul, qmul, qmul, qmul | |
555 }; | |
556 short __attribute__ ((aligned(16))) qadd8[] = | |
557 { | |
558 qadd, qadd, qadd, qadd, | |
559 qadd, qadd, qadd, qadd | |
560 }; | |
561 short __attribute__ ((aligned(16))) nqadd8[] = | |
562 { | |
563 -qadd, -qadd, -qadd, -qadd, | |
564 -qadd, -qadd, -qadd, -qadd | |
565 }; | |
566 register vector short blockv, qmulv, qaddv, nqaddv, temp1; | |
567 register vector bool short blockv_null, blockv_neg; | |
568 register short backup_0 = block[0]; | |
569 register int j = 0; | |
570 | |
571 qmulv = vec_ld(0, qmul8); | |
572 qaddv = vec_ld(0, qadd8); | |
573 nqaddv = vec_ld(0, nqadd8); | |
574 | |
575 // first make sure block[j] is 16 bytes-aligned | |
576 for(j = 0; (j <= nCoeffs) && ((((unsigned long)block) + (j << 1)) & 0x0000000F) ; j++) { | |
577 level = block[j]; | |
578 if (level) { | |
579 if (level < 0) { | |
580 level = level * qmul - qadd; | |
581 } else { | |
582 level = level * qmul + qadd; | |
583 } | |
584 block[j] = level; | |
585 } | |
586 } | |
587 | |
588 // vectorize all the 16 bytes-aligned blocks | |
589 // of 8 elements | |
590 for(; (j + 7) <= nCoeffs ; j+=8) | |
591 { | |
592 blockv = vec_ld(j << 1, block); | |
593 blockv_neg = vec_cmplt(blockv, vczero); | |
594 blockv_null = vec_cmpeq(blockv, vczero); | |
595 // choose between +qadd or -qadd as the third operand | |
596 temp1 = vec_sel(qaddv, nqaddv, blockv_neg); | |
597 // multiply & add (block{i,i+7} * qmul [+-] qadd) | |
598 temp1 = vec_mladd(blockv, qmulv, temp1); | |
599 // put 0 where block[{i,i+7} used to have 0 | |
600 blockv = vec_sel(temp1, blockv, blockv_null); | |
601 vec_st(blockv, j << 1, block); | |
602 } | |
603 | |
604 // if nCoeffs isn't a multiple of 8, finish the job | |
605 // using good old scalar units. | |
606 // (we could do it using a truncated vector, | |
607 // but I'm not sure it's worth the hassle) | |
608 for(; j <= nCoeffs ; j++) { | |
609 level = block[j]; | |
610 if (level) { | |
611 if (level < 0) { | |
612 level = level * qmul - qadd; | |
613 } else { | |
614 level = level * qmul + qadd; | |
615 } | |
616 block[j] = level; | |
617 } | |
618 } | |
619 | |
620 if (i == 1) | |
621 { // cheat. this avoid special-casing the first iteration | |
622 block[0] = backup_0; | |
623 } | |
624 } | |
625 #endif | |
626 } |