view synth_filter.c @ 12454:f4355cd85faa libavcodec

Port latest x264 deblock asm (before they moved to using NV12 as internal format), LGPL'ed with permission from Jason and Loren. This includes mmx2 code, so remove inline asm from h264dsp_mmx.c accordingly.
author rbultje
date Fri, 03 Sep 2010 16:52:46 +0000
parents 18f17f44de37
children
line wrap: on
line source

/*
 * copyright (c) 2008 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "fft.h"
#include "synth_filter.h"

static void synth_filter_float(FFTContext *imdct,
                           float *synth_buf_ptr, int *synth_buf_offset,
                           float synth_buf2[32], const float window[512],
                           float out[32], const float in[32], float scale, float bias)
{
    float *synth_buf= synth_buf_ptr + *synth_buf_offset;
    int i, j;

    ff_imdct_half(imdct, synth_buf, in);

    for (i = 0; i < 16; i++){
        float a= synth_buf2[i     ];
        float b= synth_buf2[i + 16];
        float c= 0;
        float d= 0;
        for (j = 0; j < 512 - *synth_buf_offset; j += 64){
            a += window[i + j     ]*(-synth_buf[15 - i + j      ]);
            b += window[i + j + 16]*( synth_buf[     i + j      ]);
            c += window[i + j + 32]*( synth_buf[16 + i + j      ]);
            d += window[i + j + 48]*( synth_buf[31 - i + j      ]);
        }
        for (     ; j < 512; j += 64){
            a += window[i + j     ]*(-synth_buf[15 - i + j - 512]);
            b += window[i + j + 16]*( synth_buf[     i + j - 512]);
            c += window[i + j + 32]*( synth_buf[16 + i + j - 512]);
            d += window[i + j + 48]*( synth_buf[31 - i + j - 512]);
        }
        out[i     ] = a*scale + bias;
        out[i + 16] = b*scale + bias;
        synth_buf2[i     ] = c;
        synth_buf2[i + 16] = d;
    }
    *synth_buf_offset= (*synth_buf_offset - 32)&511;
}

av_cold void ff_synth_filter_init(SynthFilterContext *c)
{
    c->synth_filter_float = synth_filter_float;

    if (ARCH_ARM) ff_synth_filter_init_arm(c);
}