Mercurial > libavcodec.hg
annotate dcadsp.c @ 11989:176c5deb6756 libavcodec
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
we apply them as 16x8/8x16/8x8 subblocks where possible. Since this allows
us to use width=8/16 instead of width=4 MC functions, we can now take more
advantage of SSE2/SSSE3 optimizations, leading to a total speedup for splitMV
filter of about 10%.
author | rbultje |
---|---|
date | Mon, 28 Jun 2010 13:50:55 +0000 |
parents | 8d3539d6ba3d |
children |
rev | line source |
---|---|
11617
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
1 /* |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
2 * Copyright (c) 2004 Gildas Bazin |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
3 * Copyright (c) 2010 Mans Rullgard <mans@mansr.com> |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
4 * |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
5 * This file is part of FFmpeg. |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
6 * |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
8 * modify it under the terms of the GNU Lesser General Public |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
9 * License as published by the Free Software Foundation; either |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
11 * |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
15 * Lesser General Public License for more details. |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
16 * |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
17 * You should have received a copy of the GNU Lesser General Public |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
20 */ |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
21 |
11619 | 22 #include "config.h" |
11617
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
23 #include "dcadsp.h" |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
24 |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
25 static void dca_lfe_fir_c(float *out, const float *in, const float *coefs, |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
26 int decifactor, float scale, float bias) |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
27 { |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
28 float *out2 = out + decifactor; |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
29 const float *cf0 = coefs; |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
30 const float *cf1 = coefs + 256; |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
31 int j, k; |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
32 |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
33 /* One decimated sample generates 2*decifactor interpolated ones */ |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
34 for (k = 0; k < decifactor; k++) { |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
35 float v0 = 0.0; |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
36 float v1 = 0.0; |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
37 for (j = 0; j < 256 / decifactor; j++) { |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
38 float s = in[-j]; |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
39 v0 += s * *cf0++; |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
40 v1 += s * *--cf1; |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
41 } |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
42 *out++ = (v0 * scale) + bias; |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
43 *out2++ = (v1 * scale) + bias; |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
44 } |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
45 } |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
46 |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
47 void ff_dcadsp_init(DCADSPContext *s) |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
48 { |
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
49 s->lfe_fir = dca_lfe_fir_c; |
11619 | 50 if (ARCH_ARM) ff_dcadsp_init_arm(s); |
11617
bb17732c00ef
DCA: break out lfe_interpolation_fir() inner loops to a function
mru
parents:
diff
changeset
|
51 } |