view bfin/mpegvideo_bfin.c @ 5775:38f220befc32 libavcodec

Replace "signed short" typecast with "int16_t" as suggested by Mans Rullgard. Value at the right side is 16 bit length signed. We can not know for sure that short is in fact 16 bits, even if this virtually always is the case.
author voroshil
date Thu, 04 Oct 2007 15:13:42 +0000
parents 757efe20a3c8
children f7cbb7733146
line wrap: on
line source

/*
 * BlackFin MPEGVIDEO OPTIMIZATIONS
 *
 * Copyright (C) 2007 Marc Hoffman <mmh@pleasantst.com>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "dsputil.h"
#include "mpegvideo.h"
#include "avcodec.h"
#include "dsputil_bfin.h"


extern void ff_bfin_fdct (DCTELEM *block) attribute_l1_text;


static int dct_quantize_bfin (MpegEncContext *s,
                              DCTELEM *block, int n,
                              int qscale, int *overflow)
{
    int last_non_zero, q, start_i;
    const short *qmat;
    short *bias;
    const uint8_t *scantable= s->intra_scantable.scantable;
    short dc;
    int   max=0;

    PROF("fdct",0);
    ff_bfin_fdct (block);
    EPROF();

    PROF("denoise",1);
    if(s->dct_error_sum)
        s->denoise_dct(s, block);
    EPROF();

    PROF("quant-init",2);
    if (s->mb_intra) {
        if (!s->h263_aic) {
            if (n < 4)
                q = s->y_dc_scale;
            else
                q = s->c_dc_scale;
            q = q << 3;
        } else
            /* For AIC we skip quant/dequant of INTRADC */
            q = 1 << 3;

        /* note: block[0] is assumed to be positive */
        dc = block[0] = (block[0] + (q >> 1)) / q;
        start_i = 1;
        last_non_zero = 0;
        bias = s->q_intra_matrix16[qscale][1];
        qmat = s->q_intra_matrix16[qscale][0];

    } else {
        start_i = 0;
        last_non_zero = -1;
        bias = s->q_inter_matrix16[qscale][1];
        qmat = s->q_inter_matrix16[qscale][0];

    }
    EPROF();

    PROF("quantize",4);

    /*  for(i=start_i; i<64; i++) {                           */
    /*      sign     = (block[i]>>15)|1;                      */
    /*      level    = ((abs(block[i])+bias[0])*qmat[i])>>16; */
    /*      if (level < 0) level = 0;                         */
    /*      max     |= level;                                 */
    /*      level    = level * sign;                          */
    /*      block[i] = level;                                 */
    /*  } */

    asm volatile
        ("i2=%1;\n\t"
         "r1=[%1++];                                                         \n\t"
         "r0=r1>>>15 (v);                                                    \n\t"
         "lsetup (0f,1f) lc0=%3;                                             \n\t"
         "0:   r0=r0|%4;                                                     \n\t"
         "     r1=abs r1 (v)                                    || r2=[%2++];\n\t"
         "     r1=r1+|+%5;                                                   \n\t"
         "     r1=max(r1,%6) (v);                                            \n\t"
         "     r1.h=(a1 =r1.h*r2.h), r1.l=(a0 =r1.l*r2.l) (tfu);             \n\t"
         "     %0=%0|r1;                                                     \n\t"
         "     r0.h=(a1 =r1.h*r0.h), r0.l=(a0 =r1.l*r0.l) (is)  || r1=[%1++];\n\t"
         "1:   r0=r1>>>15 (v)                                   || [i2++]=r0;\n\t"
         "r1=%0>>16;                                                         \n\t"
         "%0=%0|r1;                                                          \n\t"
         "%0.h=0;                                                            \n\t"
         : "=&d" (max)
         : "b" (block), "b" (qmat), "a" (32), "d" (0x00010001), "d" (bias[0]*0x10001), "d" (0)
         : "R0","R1","R2", "I2");
    if (start_i == 1) block[0] = dc;

    EPROF();


    PROF("zzscan",5);

    asm volatile
        ("r0=b[%1--] (x);         \n\t"
         "lsetup (0f,1f) lc0=%3;  \n\t"     /*    for(i=63; i>=start_i; i--) { */
         "0: p0=r0;               \n\t"     /*        j = scantable[i];        */
         "   p0=%2+(p0<<1);       \n\t"     /*        if (block[j]) {          */
         "   r0=w[p0];            \n\t"     /*           last_non_zero = i;    */
         "   cc=r0==0;            \n\t"     /*           break;                */
         "   if !cc jump 2f;      \n\t"     /*        }                        */
         "1: r0=b[%1--] (x);      \n\t"     /*    }                            */
         "   %0=%4;               \n\t"
         "   jump 3f;             \n\t"
         "2: %0=lc0;              \n\t"
         "3:\n\t"

         : "=d" (last_non_zero)
         : "a" (scantable+63), "a" (block), "a" (63), "d" (last_non_zero)
         : "P0","R0");

    EPROF();

    *overflow= s->max_qcoeff < max; //overflow might have happened

    bfprof();

    /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
    if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
        ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);

    return last_non_zero;
}

void MPV_common_init_bfin (MpegEncContext *s)
{
    s->dct_quantize= dct_quantize_bfin;
}