annotate x86/mpegaudiodec_mmx.c @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 9fef0a8ddd63
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11939
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
1 /*
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
2 * MMX optimized MP3 decoding functions
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
3 * Copyright (c) 2010 Vitor Sessak
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
4 *
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
5 * This file is part of FFmpeg.
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
6 *
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
11 *
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
15 * Lesser General Public License for more details.
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
16 *
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
20 */
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
21
12475
9fef0a8ddd63 Move mm_support() from libavcodec to libavutil, make it a public
stefano
parents: 12456
diff changeset
22 #include "libavutil/cpu.h"
11939
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
23 #include "libavutil/x86_cpu.h"
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
24
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
25 #define CONFIG_FLOAT 1
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
26 #include "libavcodec/mpegaudio.h"
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
27
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
28 #define MACS(rt, ra, rb) rt+=(ra)*(rb)
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
29 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
30
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
31 #define SUM8(op, sum, w, p) \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
32 { \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
33 op(sum, (w)[0 * 64], (p)[0 * 64]); \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
34 op(sum, (w)[1 * 64], (p)[1 * 64]); \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
35 op(sum, (w)[2 * 64], (p)[2 * 64]); \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
36 op(sum, (w)[3 * 64], (p)[3 * 64]); \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
37 op(sum, (w)[4 * 64], (p)[4 * 64]); \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
38 op(sum, (w)[5 * 64], (p)[5 * 64]); \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
39 op(sum, (w)[6 * 64], (p)[6 * 64]); \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
40 op(sum, (w)[7 * 64], (p)[7 * 64]); \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
41 }
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
42
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
43 static void apply_window(const float *buf, const float *win1,
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
44 const float *win2, float *sum1, float *sum2, int len)
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
45 {
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
46 x86_reg count = - 4*len;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
47 const float *win1a = win1+len;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
48 const float *win2a = win2+len;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
49 const float *bufa = buf+len;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
50 float *sum1a = sum1+len;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
51 float *sum2a = sum2+len;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
52
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
53
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
54 #define MULT(a, b) \
11941
9e12316c508a Fix asm constraints in apply_window()
vitor
parents: 11939
diff changeset
55 "movaps " #a "(%1,%0), %%xmm1 \n\t" \
9e12316c508a Fix asm constraints in apply_window()
vitor
parents: 11939
diff changeset
56 "movaps " #a "(%3,%0), %%xmm2 \n\t" \
11939
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
57 "mulps %%xmm2, %%xmm1 \n\t" \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
58 "subps %%xmm1, %%xmm0 \n\t" \
11941
9e12316c508a Fix asm constraints in apply_window()
vitor
parents: 11939
diff changeset
59 "mulps " #b "(%2,%0), %%xmm2 \n\t" \
11939
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
60 "subps %%xmm2, %%xmm4 \n\t" \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
61
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
62 __asm__ volatile(
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
63 "1: \n\t"
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
64 "xorps %%xmm0, %%xmm0 \n\t"
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
65 "xorps %%xmm4, %%xmm4 \n\t"
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
66
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
67 MULT( 0, 0)
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
68 MULT( 256, 64)
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
69 MULT( 512, 128)
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
70 MULT( 768, 192)
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
71 MULT(1024, 256)
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
72 MULT(1280, 320)
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
73 MULT(1536, 384)
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
74 MULT(1792, 448)
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
75
11941
9e12316c508a Fix asm constraints in apply_window()
vitor
parents: 11939
diff changeset
76 "movaps %%xmm0, (%4,%0) \n\t"
9e12316c508a Fix asm constraints in apply_window()
vitor
parents: 11939
diff changeset
77 "movaps %%xmm4, (%5,%0) \n\t"
11942
ce4b71c171aa Fix compilation on x64.
vitor
parents: 11941
diff changeset
78 "add $16, %0 \n\t"
11939
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
79 "jl 1b \n\t"
11941
9e12316c508a Fix asm constraints in apply_window()
vitor
parents: 11939
diff changeset
80 :"+&r"(count)
9e12316c508a Fix asm constraints in apply_window()
vitor
parents: 11939
diff changeset
81 :"r"(win1a), "r"(win2a), "r"(bufa), "r"(sum1a), "r"(sum2a)
11939
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
82 );
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
83
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
84 #undef MULT
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
85 }
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
86
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
87 static void apply_window_mp3(float *in, float *win, int *unused, float *out,
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
88 int incr)
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
89 {
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
90 LOCAL_ALIGNED_16(float, suma, [17]);
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
91 LOCAL_ALIGNED_16(float, sumb, [17]);
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
92 LOCAL_ALIGNED_16(float, sumc, [17]);
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
93 LOCAL_ALIGNED_16(float, sumd, [17]);
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
94
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
95 float sum;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
96
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
97 /* copy to avoid wrap */
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
98 memcpy(in + 512, in, 32 * sizeof(*in));
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
99
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
100 apply_window(in + 16, win , win + 512, suma, sumc, 16);
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
101 apply_window(in + 32, win + 48, win + 640, sumb, sumd, 16);
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
102
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
103 SUM8(MACS, suma[0], win + 32, in + 48);
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
104
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
105 sumc[ 0] = 0;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
106 sumb[16] = 0;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
107 sumd[16] = 0;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
108
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
109 #define SUMS(suma, sumb, sumc, sumd, out1, out2) \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
110 "movups " #sumd "(%4), %%xmm0 \n\t" \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
111 "shufps $0x1b, %%xmm0, %%xmm0 \n\t" \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
112 "subps " #suma "(%1), %%xmm0 \n\t" \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
113 "movaps %%xmm0," #out1 "(%0) \n\t" \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
114 \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
115 "movups " #sumc "(%3), %%xmm0 \n\t" \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
116 "shufps $0x1b, %%xmm0, %%xmm0 \n\t" \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
117 "addps " #sumb "(%2), %%xmm0 \n\t" \
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
118 "movaps %%xmm0," #out2 "(%0) \n\t"
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
119
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
120 if (incr == 1) {
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
121 __asm__ volatile(
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
122 SUMS( 0, 48, 4, 52, 0, 112)
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
123 SUMS(16, 32, 20, 36, 16, 96)
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
124 SUMS(32, 16, 36, 20, 32, 80)
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
125 SUMS(48, 0, 52, 4, 48, 64)
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
126
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
127 :"+&r"(out)
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
128 :"r"(&suma[0]), "r"(&sumb[0]), "r"(&sumc[0]), "r"(&sumd[0])
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
129 :"memory"
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
130 );
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
131 out += 16*incr;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
132 } else {
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
133 int j;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
134 float *out2 = out + 32 * incr;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
135 out[0 ] = -suma[ 0];
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
136 out += incr;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
137 out2 -= incr;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
138 for(j=1;j<16;j++) {
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
139 *out = -suma[ j] + sumd[16-j];
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
140 *out2 = sumb[16-j] + sumc[ j];
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
141 out += incr;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
142 out2 -= incr;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
143 }
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
144 }
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
145
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
146 sum = 0;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
147 SUM8(MLSS, sum, win + 16 + 32, in + 32);
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
148 *out = sum;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
149 }
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
150
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
151 void ff_mpegaudiodec_init_mmx(MPADecodeContext *s)
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
152 {
12475
9fef0a8ddd63 Move mm_support() from libavcodec to libavutil, make it a public
stefano
parents: 12456
diff changeset
153 int mm_flags = av_get_cpu_flags();
11939
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
154
12456
a5ddb39627fd Rename FF_MM_ symbols related to CPU features flags as AV_CPU_FLAG_
stefano
parents: 12414
diff changeset
155 if (mm_flags & AV_CPU_FLAG_SSE2) {
11939
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
156 s->apply_window_mp3 = apply_window_mp3;
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
157 }
ef338bd70180 SSE-optimized MP3 floating point windowing functions
vitor
parents:
diff changeset
158 }