annotate i386/flacdsp_mmx.c @ 8043:a591c3736fd8 libavcodec

Revert r15653. Was "Copy pts for each raw encoded frame." It causes problems as timestamps management when video sync is zero needs rework in ffmpeg.c.
author benoit
date Thu, 23 Oct 2008 07:30:16 +0000
parents eebc7209c47f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
1 /*
6030
fb99890ee609 move FLAC mmx dsp to its own file
aurel
parents: 6009
diff changeset
2 * MMX optimized FLAC DSP utils
fb99890ee609 move FLAC mmx dsp to its own file
aurel
parents: 6009
diff changeset
3 * Copyright (c) 2007 Loren Merritt
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
4 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3932
diff changeset
5 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3932
diff changeset
6 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3932
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
429
718a22dc121f license/copyright change
glantau
parents: 422
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 422
diff changeset
9 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3932
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
11 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3932
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
429
718a22dc121f license/copyright change
glantau
parents: 422
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
718a22dc121f license/copyright change
glantau
parents: 422
diff changeset
15 * Lesser General Public License for more details.
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
16 *
429
718a22dc121f license/copyright change
glantau
parents: 422
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3932
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2979
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0
986e461dc072 Initial revision
glantau
parents:
diff changeset
20 */
986e461dc072 Initial revision
glantau
parents:
diff changeset
21
6763
f7cbb7733146 Use full path for #includes from another directory.
diego
parents: 6755
diff changeset
22 #include "libavutil/x86_cpu.h"
5946
55251379b5b1 make ff_p* vars extern so that they can be used in various *_mmx.c files
aurel
parents: 5933
diff changeset
23 #include "dsputil_mmx.h"
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2940
diff changeset
24
5737
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
25 static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data)
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
26 {
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
27 double c = 2.0 / (len-1.0);
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
28 int n2 = len>>1;
6755
33896780c612 Do not misuse long as the size of a register in x86.
ramiro
parents: 6339
diff changeset
29 x86_reg i = -n2*sizeof(int32_t);
33896780c612 Do not misuse long as the size of a register in x86.
ramiro
parents: 6339
diff changeset
30 x86_reg j = n2*sizeof(int32_t);
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 7157
diff changeset
31 __asm__ volatile(
7157
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
32 "movsd %0, %%xmm7 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
33 "movapd "MANGLE(ff_pd_1)", %%xmm6 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
34 "movapd "MANGLE(ff_pd_2)", %%xmm5 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
35 "movlhps %%xmm7, %%xmm7 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
36 "subpd %%xmm5, %%xmm7 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
37 "addsd %%xmm6, %%xmm7 \n\t"
7156
30e3a8465436 Use MANGLE() instead of memory operands to read globals.
astrange
parents: 6763
diff changeset
38 ::"m"(c)
5737
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
39 );
6339
0ea2b97aa9f6 use fewer registers in apply_welch_window_sse2
lorenm
parents: 6030
diff changeset
40 #define WELCH(MOVPD, offset)\
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 7157
diff changeset
41 __asm__ volatile(\
7157
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
42 "1: \n\t"\
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
43 "movapd %%xmm7, %%xmm1 \n\t"\
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
44 "mulpd %%xmm1, %%xmm1 \n\t"\
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
45 "movapd %%xmm6, %%xmm0 \n\t"\
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
46 "subpd %%xmm1, %%xmm0 \n\t"\
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
47 "pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
48 "cvtpi2pd (%3,%0), %%xmm2 \n\t"\
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
49 "cvtpi2pd "#offset"*4(%3,%1), %%xmm3 \n\t"\
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
50 "mulpd %%xmm0, %%xmm2 \n\t"\
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
51 "mulpd %%xmm1, %%xmm3 \n\t"\
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
52 "movapd %%xmm2, (%2,%0,2) \n\t"\
6339
0ea2b97aa9f6 use fewer registers in apply_welch_window_sse2
lorenm
parents: 6030
diff changeset
53 MOVPD" %%xmm3, "#offset"*8(%2,%1,2) \n\t"\
7157
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
54 "subpd %%xmm5, %%xmm7 \n\t"\
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
55 "sub $8, %1 \n\t"\
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
56 "add $8, %0 \n\t"\
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
57 "jl 1b \n\t"\
5737
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
58 :"+&r"(i), "+&r"(j)\
6339
0ea2b97aa9f6 use fewer registers in apply_welch_window_sse2
lorenm
parents: 6030
diff changeset
59 :"r"(w_data+n2), "r"(data+n2)\
5737
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
60 );
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
61 if(len&1)
6339
0ea2b97aa9f6 use fewer registers in apply_welch_window_sse2
lorenm
parents: 6030
diff changeset
62 WELCH("movupd", -1)
5737
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
63 else
6339
0ea2b97aa9f6 use fewer registers in apply_welch_window_sse2
lorenm
parents: 6030
diff changeset
64 WELCH("movapd", -2)
5737
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
65 #undef WELCH
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
66 }
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
67
6030
fb99890ee609 move FLAC mmx dsp to its own file
aurel
parents: 6009
diff changeset
68 void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag,
fb99890ee609 move FLAC mmx dsp to its own file
aurel
parents: 6009
diff changeset
69 double *autoc)
5737
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
70 {
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
71 double tmp[len + lag + 2];
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
72 double *data1 = tmp + lag;
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
73 int j;
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
74
6755
33896780c612 Do not misuse long as the size of a register in x86.
ramiro
parents: 6339
diff changeset
75 if((x86_reg)data1 & 15)
5737
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
76 data1++;
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
77
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
78 apply_welch_window_sse2(data, len, data1);
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
79
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
80 for(j=0; j<lag; j++)
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
81 data1[j-lag]= 0.0;
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
82 data1[len] = 0.0;
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
83
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
84 for(j=0; j<lag; j+=2){
6755
33896780c612 Do not misuse long as the size of a register in x86.
ramiro
parents: 6339
diff changeset
85 x86_reg i = -len*sizeof(double);
5737
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
86 if(j == lag-2) {
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 7157
diff changeset
87 __asm__ volatile(
7157
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
88 "movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
89 "movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
90 "movsd "MANGLE(ff_pd_1)", %%xmm2 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
91 "1: \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
92 "movapd (%4,%0), %%xmm3 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
93 "movupd -8(%5,%0), %%xmm4 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
94 "movapd (%5,%0), %%xmm5 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
95 "mulpd %%xmm3, %%xmm4 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
96 "mulpd %%xmm3, %%xmm5 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
97 "mulpd -16(%5,%0), %%xmm3 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
98 "addpd %%xmm4, %%xmm1 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
99 "addpd %%xmm5, %%xmm0 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
100 "addpd %%xmm3, %%xmm2 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
101 "add $16, %0 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
102 "jl 1b \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
103 "movhlps %%xmm0, %%xmm3 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
104 "movhlps %%xmm1, %%xmm4 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
105 "movhlps %%xmm2, %%xmm5 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
106 "addsd %%xmm3, %%xmm0 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
107 "addsd %%xmm4, %%xmm1 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
108 "addsd %%xmm5, %%xmm2 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
109 "movsd %%xmm0, %1 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
110 "movsd %%xmm1, %2 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
111 "movsd %%xmm2, %3 \n\t"
5737
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
112 :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]), "=m"(autoc[j+2])
7156
30e3a8465436 Use MANGLE() instead of memory operands to read globals.
astrange
parents: 6763
diff changeset
113 :"r"(data1+len), "r"(data1+len-j)
5737
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
114 );
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
115 } else {
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 7157
diff changeset
116 __asm__ volatile(
7157
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
117 "movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
118 "movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
119 "1: \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
120 "movapd (%3,%0), %%xmm3 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
121 "movupd -8(%4,%0), %%xmm4 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
122 "mulpd %%xmm3, %%xmm4 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
123 "mulpd (%4,%0), %%xmm3 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
124 "addpd %%xmm4, %%xmm1 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
125 "addpd %%xmm3, %%xmm0 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
126 "add $16, %0 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
127 "jl 1b \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
128 "movhlps %%xmm0, %%xmm3 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
129 "movhlps %%xmm1, %%xmm4 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
130 "addsd %%xmm3, %%xmm0 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
131 "addsd %%xmm4, %%xmm1 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
132 "movsd %%xmm0, %1 \n\t"
b76ee34ea184 Realign newlines.
astrange
parents: 7156
diff changeset
133 "movsd %%xmm1, %2 \n\t"
5737
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
134 :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1])
7156
30e3a8465436 Use MANGLE() instead of memory operands to read globals.
astrange
parents: 6763
diff changeset
135 :"r"(data1+len), "r"(data1+len-j)
5737
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
136 );
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
137 }
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
138 }
efa3c1f9259a sse2 version of compute_autocorr().
lorenm
parents: 5602
diff changeset
139 }