Mercurial > mplayer.hg
annotate libao2/fir.h @ 4725:534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
author | anders |
---|---|
date | Sat, 16 Feb 2002 13:08:14 +0000 |
parents | 99dc749591e2 |
children | c2bb05709676 |
rev | line source |
---|---|
3631 | 1 /*============================================================================= |
2 // | |
4049 | 3 // This software has been released under the terms of the GNU Public |
4 // license. See http://www.gnu.org/copyleft/gpl.html for details. | |
3631 | 5 // |
6 // Copyright 2001 Anders Johansson ajh@atri.curtin.edu.au | |
7 // | |
8 //============================================================================= | |
9 */ | |
10 | |
11 #ifndef __FIR_H__ | |
12 #define __FIR_H__ | |
13 | |
4725
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
14 /* Fixpoint 16 bit fir filter FIR filter. The filter is implemented |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
15 both in C and MMX assembly. The filter consists of one macro |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
16 UPDATE_QUE and one inline function firn. The macro can be used for |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
17 adding new data to the circular buffer used by the filter firn. |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
18 Limitations: max length of n = 16*4 and n must be multiple of 4 (pad |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
19 fiter with zeros for other lengths). Sometimes it works with filters |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
20 longer than 4*16 (the problem is overshoot and the acumulated energy |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
21 in the filter taps). */ |
3631 | 22 |
4725
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
23 #ifdef HAVE_MMX |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
24 inline int32_t firn(int16_t* x, int16_t* w, int16_t n) |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
25 { |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
26 register int32_t y; // Output |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
27 // Prologue |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
28 asm volatile(" pxor %mm1, %mm1;\n" ); // Clear buffer yt |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
29 // Main loop |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
30 while((n-=4)>=0){ |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
31 asm volatile( |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
32 " movq (%1), %%mm0;\n" // Load x(n:n+4) |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
33 " pmaddwd (%0), %%mm0;\n" // yt(n:n+1)=sum(x(n:n+4).*w(n:n+4)) |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
34 " psrld $16, %%mm0;\n" // yt(n:n+1)=yt(n:n+1)>>16 |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
35 " paddd %%mm0, %%mm1;\n" // yt(n:n+1)=yt(n-2:n-1)+yt(n:n+1) |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
36 :: "r" (w), "r" (x)); |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
37 w+=4; x+=4; |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
38 } |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
39 // Epilogue |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
40 asm volatile( |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
41 " movq %%mm1, %%mm0;\n" |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
42 " punpckhdq %%mm1, %%mm0;\n" |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
43 " paddd %%mm0, %%mm1;\n" //yt(n)=yt(n)+yt(n+1) |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
44 " movd %%mm1, %0 ;\n" //y=yt |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
45 " emms ;\n" |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
46 : "=&r" (y)); |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
47 return y; |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
48 } |
3631 | 49 |
4725
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
50 #else /* HAVE_MMX */ |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
51 |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
52 // Same thing as above but in C |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
53 inline int32_t firn(int16_t* x, int16_t* w, int16_t n) |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
54 { |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
55 register int32_t y=0; |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
56 while((n-=4) >=0) |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
57 y+=w[n]*x[n]+w[n+1]*x[n+1]+w[n+2]*x[n+2]+w[n+3]*x[n+3] >> 16; |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
58 return y; |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
59 } |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
60 |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
61 #endif /* HAVE_MMX */ |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
62 |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
63 // Macro to add new data to circular queue |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
64 #define UPDATE_QUE(ind,xq,xid) \ |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
65 xid=(--xid)&(L-1); \ |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
66 xq[xid]=xq[xid+L]=*(ind); |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
67 |
3631 | 68 #ifdef L8 |
4725
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
69 #ifdef HAVE_MMX |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
70 #define FIR(x,w,y) *y=(int16_t)firn(x,w,8); |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
71 #else /* HAVE_MMX */ |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
72 // Unrolled loop to speed up execution |
3631 | 73 #define FIR(x,w,y){ \ |
74 int16_t a = (w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16; \ | |
75 int16_t b = (w[4]*x[4]+w[5]*x[5]+w[6]*x[6]+w[7]*x[7]) >> 16; \ | |
76 y[0] = a+b; \ | |
77 } | |
4725
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
78 #endif /* HAVE_MMX */ |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
79 #endif /* L8 */ |
3631 | 80 |
4725
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
81 #ifdef L16 |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
82 #define FIR(x,w,y) *y=(int16_t)firn(x,w,16); |
534ef9323eca
MMX part rewritten and 16 tap filter added for better sound qualty
anders
parents:
4535
diff
changeset
|
83 #endif /* L16 */ |
3631 | 84 |
85 #endif /* __FIR_H__ */ |