annotate vp56dsp.c @ 12197:fbf4d5b1b664 libavcodec

Remove FF_MM_SSE2/3 flags for CPUs where this is generally not faster than regular MMX code. Examples of this are the Core1 CPU. Instead, set a new flag, FF_MM_SSE2/3SLOW, which can be checked for particular SSE2/3 functions that have been checked specifically on such CPUs and are actually faster than their MMX counterparts. In addition, use this flag to enable particular VP8 and LPC SSE2 functions that are faster than their MMX counterparts. Based on a patch by Loren Merritt <lorenm AT u washington edu>.
author rbultje
date Mon, 19 Jul 2010 22:38:23 +0000
parents 1c6d78234e67
children 9f06475db098
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11665
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
1 /*
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
2 * Copyright (c) 2006 Aurelien Jacobs <aurel@gnuage.org>
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
3 * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
4 *
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
5 * This file is part of FFmpeg.
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
6 *
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
11 *
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
15 * Lesser General Public License for more details.
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
16 *
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
20 */
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
21
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
22 #include <stdint.h>
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
23 #include "avcodec.h"
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
24 #include "vp56dsp.h"
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
25
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
26 /* Gives very similar result than the vp6 version except in a few cases */
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
27 static int vp5_adjust(int v, int t)
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
28 {
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
29 int s2, s1 = v >> 31;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
30 v ^= s1;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
31 v -= s1;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
32 v *= v < 2*t;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
33 v -= t;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
34 s2 = v >> 31;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
35 v ^= s2;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
36 v -= s2;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
37 v = t - v;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
38 v += s1;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
39 v ^= s1;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
40 return v;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
41 }
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
42
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
43 static int vp6_adjust(int v, int t)
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
44 {
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
45 int V = v, s = v >> 31;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
46 V ^= s;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
47 V -= s;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
48 if (V-t-1 >= (unsigned)(t-1))
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
49 return v;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
50 V = 2*t - V;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
51 V += s;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
52 V ^= s;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
53 return V;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
54 }
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
55
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
56
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
57 #define VP56_EDGE_FILTER(pfx, suf, pix_inc, line_inc) \
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
58 static void pfx##_edge_filter_##suf(uint8_t *yuv, int stride, int t) \
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
59 { \
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
60 int pix2_inc = 2 * pix_inc; \
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
61 int i, v; \
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
62 \
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
63 for (i=0; i<12; i++) { \
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
64 v = (yuv[-pix2_inc] + 3*(yuv[0]-yuv[-pix_inc]) - yuv[pix_inc] + 4)>>3;\
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
65 v = pfx##_adjust(v, t); \
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
66 yuv[-pix_inc] = av_clip_uint8(yuv[-pix_inc] + v); \
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
67 yuv[0] = av_clip_uint8(yuv[0] - v); \
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
68 yuv += line_inc; \
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
69 } \
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
70 }
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
71
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
72 VP56_EDGE_FILTER(vp5, hor, 1, stride)
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
73 VP56_EDGE_FILTER(vp5, ver, stride, 1)
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
74 VP56_EDGE_FILTER(vp6, hor, 1, stride)
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
75 VP56_EDGE_FILTER(vp6, ver, stride, 1)
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
76
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
77 void ff_vp56dsp_init(VP56DSPContext *s, enum CodecID codec)
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
78 {
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
79 if (codec == CODEC_ID_VP5) {
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
80 s->edge_filter_hor = vp5_edge_filter_hor;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
81 s->edge_filter_ver = vp5_edge_filter_ver;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
82 } else {
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
83 s->edge_filter_hor = vp6_edge_filter_hor;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
84 s->edge_filter_ver = vp6_edge_filter_ver;
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
85 }
11666
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents: 11665
diff changeset
86
1c6d78234e67 ARM: NEON optimised VP6 edge filter
mru
parents: 11665
diff changeset
87 if (ARCH_ARM) ff_vp56dsp_init_arm(s, codec);
11665
85ee3d14b906 VP56: move vp56_edge_filter to new VP56DSPContext
mru
parents:
diff changeset
88 }