annotate dcadsp.c @ 11989:176c5deb6756 libavcodec

Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but we apply them as 16x8/8x16/8x8 subblocks where possible. Since this allows us to use width=8/16 instead of width=4 MC functions, we can now take more advantage of SSE2/SSSE3 optimizations, leading to a total speedup for splitMV filter of about 10%.
author rbultje
date Mon, 28 Jun 2010 13:50:55 +0000
parents 8d3539d6ba3d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11617
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
1 /*
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
2 * Copyright (c) 2004 Gildas Bazin
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
3 * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
4 *
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
5 * This file is part of FFmpeg.
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
6 *
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
11 *
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
15 * Lesser General Public License for more details.
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
16 *
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
20 */
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
21
11619
8d3539d6ba3d DCA: ARM/NEON optimised lfe_fir
mru
parents: 11617
diff changeset
22 #include "config.h"
11617
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
23 #include "dcadsp.h"
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
24
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
25 static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
26 int decifactor, float scale, float bias)
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
27 {
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
28 float *out2 = out + decifactor;
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
29 const float *cf0 = coefs;
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
30 const float *cf1 = coefs + 256;
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
31 int j, k;
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
32
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
33 /* One decimated sample generates 2*decifactor interpolated ones */
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
34 for (k = 0; k < decifactor; k++) {
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
35 float v0 = 0.0;
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
36 float v1 = 0.0;
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
37 for (j = 0; j < 256 / decifactor; j++) {
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
38 float s = in[-j];
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
39 v0 += s * *cf0++;
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
40 v1 += s * *--cf1;
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
41 }
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
42 *out++ = (v0 * scale) + bias;
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
43 *out2++ = (v1 * scale) + bias;
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
44 }
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
45 }
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
46
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
47 void ff_dcadsp_init(DCADSPContext *s)
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
48 {
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
49 s->lfe_fir = dca_lfe_fir_c;
11619
8d3539d6ba3d DCA: ARM/NEON optimised lfe_fir
mru
parents: 11617
diff changeset
50 if (ARCH_ARM) ff_dcadsp_init_arm(s);
11617
bb17732c00ef DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff changeset
51 }