changeset 4725:534ef9323eca

MMX part rewritten and 16 tap filter added for better sound qualty
author anders
date Sat, 16 Feb 2002 13:08:14 +0000
parents 798fdbcef121
children 9ff121145b20
files libao2/filter.h libao2/fir.h libao2/pl_resample.c
diffstat 3 files changed, 203 insertions(+), 125 deletions(-) [+]
line wrap: on
line diff
--- a/libao2/filter.h	Sat Feb 16 13:06:45 2002 +0000
+++ b/libao2/filter.h	Sat Feb 16 13:08:14 2002 +0000
@@ -208,3 +208,134 @@
 -5, 53, -279, 32759, 289, -55, 6, 0, \
 -2, 18, -94, 32767, 95, -18, 2, 0, \
 }
+
+#define W16 {\
+0, 0, -1, 3, -9, 21, -47, 119,32764, -118, 47, -21, 9, -3, 1, 0, \
+0, 0, -3, 10, -27, 65, -143, 361,32757, -352, 141, -63, 27, -10, 3, 0, \
+0, 1, -5, 17, -46, 108, -240, 607,32743, -581, 233, -105, 44, -16, 4, -1, \
+0, 1, -6, 23, -65, 153, -338, 857,32722, -805, 324, -146, 61, -22, 6, -1, \
+0, 1, -8, 30, -84, 197, -438, 1112,32695, -1025, 413, -187, 78, -28, 8, -1, \
+0, 2, -10, 37, -103, 243, -538, 1370,32661, -1241, 502, -226, 95, -34, 9, -2, \
+0, 2, -12, 44, -122, 288, -639, 1632,32620, -1452, 588, -265, 111, -40, 11, -2, \
+0, 3, -14, 52, -142, 334, -740, 1898,32572, -1659, 674, -304, 127, -45, 12, -2, \
+0, 3, -17, 59, -162, 381, -843, 2168,32517, -1861, 758, -342, 143, -51, 14, -2, \
+0, 3, -19, 66, -182, 427, -947, 2442,32455, -2058, 840, -379, 158, -56, 15, -2, \
+0, 4, -21, 74, -202, 474, -1051, 2720,32387, -2251, 921, -415, 173, -61, 16, -3, \
+0, 4, -23, 81, -222, 521, -1156, 3001,32312, -2440, 1000, -451, 188, -66, 18, -3, \
+0, 5, -25, 89, -243, 569, -1261, 3285,32230, -2623, 1077, -486, 202, -71, 19, -3, \
+0, 5, -28, 97, -263, 617, -1367, 3574,32142, -2802, 1153, -520, 216, -76, 20, -3, \
+0, 5, -30, 104, -284, 665, -1474, 3865,32047, -2976, 1228, -553, 230, -81, 21, -3, \
+0, 6, -32, 112, -305, 713, -1581, 4160,31945, -3145, 1300, -586, 243, -85, 22, -4, \
+0, 6, -34, 120, -326, 761, -1688, 4458,31837, -3310, 1371, -617, 256, -90, 24, -4, \
+0, 7, -37, 128, -347, 810, -1796, 4760,31722, -3470, 1440, -648, 269, -94, 25, -4, \
+0, 7, -39, 136, -368, 858, -1903, 5064,31601, -3625, 1507, -679, 281, -98, 26, -4, \
+0, 8, -42, 144, -389, 907, -2011, 5371,31474, -3775, 1573, -708, 293, -102, 27, -4, \
+-1, 8, -44, 152, -410, 955, -2119, 5682,31340, -3921, 1637, -737, 305, -106, 27, -4, \
+-1, 9, -47, 160, -431, 1004, -2228, 5995,31200, -4061, 1699, -764, 316, -109, 28, -4, \
+-1, 9, -49, 168, -453, 1052, -2336, 6311,31053, -4197, 1759, -791, 326, -113, 29, -4, \
+-1, 10, -52, 176, -474, 1101, -2444, 6629,30901, -4329, 1817, -817, 337, -116, 30, -4, \
+-1, 10, -54, 185, -495, 1149, -2552, 6951,30742, -4455, 1873, -842, 347, -119, 31, -5, \
+-1, 11, -57, 193, -516, 1197, -2659, 7274,30577, -4576, 1928, -866, 356, -123, 31, -5, \
+-1, 12, -59, 201, -537, 1245, -2767, 7600,30406, -4693, 1980, -890, 366, -125, 32, -5, \
+-1, 12, -62, 209, -558, 1293, -2874, 7928,30229, -4805, 2031, -912, 374, -128, 33, -5, \
+-1, 13, -64, 217, -579, 1341, -2980, 8259,30047, -4912, 2080, -934, 383, -131, 33, -5, \
+-1, 13, -67, 225, -600, 1388, -3086, 8591,29858, -5015, 2126, -955, 391, -133, 34, -5, \
+-1, 14, -69, 234, -621, 1435, -3192, 8925,29664, -5113, 2171, -974, 399, -136, 34, -5, \
+-1, 14, -72, 242, -642, 1482, -3297, 9261,29464, -5206, 2214, -993, 406, -138, 35, -5, \
+-1, 15, -75, 250, -662, 1528, -3401, 9599,29259, -5294, 2255, -1012, 413, -140, 35, -5, \
+-1, 15, -77, 258, -683, 1574, -3504, 9939,29048, -5378, 2294, -1029, 419, -142, 36, -5, \
+-1, 16, -80, 266, -703, 1619, -3607, 10280,28831, -5457, 2332, -1045, 426, -144, 36, -5, \
+-1, 17, -82, 274, -723, 1664, -3708, 10622,28610, -5531, 2367, -1060, 431, -146, 36, -5, \
+-1, 17, -85, 282, -743, 1709, -3808, 10966,28383, -5601, 2400, -1075, 437, -147, 37, -5, \
+-1, 18, -87, 290, -762, 1753, -3908, 11311,28151, -5666, 2431, -1089, 442, -149, 37, -5, \
+-1, 18, -90, 297, -782, 1796, -4006, 11657,27914, -5727, 2461, -1101, 447, -150, 37, -5, \
+-2, 19, -93, 305, -801, 1839, -4103, 12004,27672, -5783, 2488, -1113, 451, -151, 37, -5, \
+-2, 20, -95, 313, -820, 1881, -4198, 12352,27425, -5835, 2514, -1124, 455, -152, 37, -5, \
+-2, 20, -98, 320, -838, 1922, -4293, 12701,27173, -5882, 2538, -1134, 458, -153, 37, -5, \
+-2, 21, -100, 328, -857, 1963, -4385, 13050,26917, -5925, 2559, -1144, 462, -154, 37, -5, \
+-2, 21, -103, 335, -875, 2003, -4476, 13399,26656, -5964, 2579, -1152, 465, -155, 37, -5, \
+-2, 22, -105, 342, -892, 2042, -4566, 13749,26390, -5998, 2597, -1160, 467, -155, 38, -5, \
+-2, 23, -107, 349, -910, 2080, -4654, 14100,26121, -6028, 2613, -1166, 469, -156, 37, -5, \
+-2, 23, -110, 356, -927, 2117, -4740, 14450,25847, -6054, 2628, -1172, 471, -156, 37, -5, \
+-2, 24, -112, 363, -943, 2154, -4824, 14801,25568, -6075, 2640, -1177, 473, -156, 37, -5, \
+-2, 24, -114, 370, -959, 2189, -4906, 15151,25286, -6093, 2651, -1182, 474, -156, 37, -5, \
+-2, 25, -117, 377, -975, 2224, -4986, 15501,25000, -6106, 2659, -1185, 474, -156, 37, -5, \
+-2, 26, -119, 383, -991, 2257, -5064, 15851,24710, -6115, 2666, -1187, 475, -156, 37, -5, \
+-2, 26, -121, 389, -1005, 2290, -5140, 16200,24416, -6121, 2672, -1189, 475, -156, 37, -4, \
+-3, 27, -123, 395, -1020, 2321, -5214, 16549,24118, -6122, 2675, -1190, 475, -155, 37, -4, \
+-3, 27, -125, 401, -1034, 2352, -5285, 16897,23817, -6120, 2677, -1190, 474, -155, 36, -4, \
+-3, 28, -128, 407, -1047, 2381, -5354, 17244,23513, -6114, 2677, -1190, 474, -154, 36, -4, \
+-3, 28, -130, 413, -1060, 2409, -5421, 17590,23205, -6104, 2675, -1188, 472, -154, 36, -4, \
+-3, 29, -132, 418, -1073, 2435, -5485, 17935,22895, -6090, 2672, -1186, 471, -153, 35, -4, \
+-3, 29, -133, 423, -1085, 2461, -5546, 18279,22581, -6073, 2666, -1183, 469, -152, 35, -4, \
+-3, 30, -135, 428, -1096, 2485, -5605, 18621,22264, -6052, 2660, -1180, 467, -151, 35, -4, \
+-3, 31, -137, 433, -1107, 2508, -5660, 18962,21945, -6027, 2651, -1176, 465, -150, 34, -4, \
+-3, 31, -139, 437, -1117, 2530, -5713, 19301,21622, -5999, 2641, -1171, 463, -149, 34, -4, \
+-3, 31, -140, 442, -1126, 2550, -5764, 19639,21297, -5968, 2630, -1165, 460, -148, 34, -4, \
+-3, 32, -142, 446, -1135, 2569, -5811, 19975,20970, -5934, 2617, -1159, 457, -146, 33, -4, \
+-4, 32, -144, 450, -1144, 2586, -5855, 20309,20641, -5896, 2602, -1151, 453, -145, 33, -4, \
+-4, 33, -145, 453, -1151, 2602, -5896, 20641,20309, -5855, 2586, -1144, 450, -144, 32, -4, \
+-4, 33, -146, 457, -1159, 2617, -5934, 20970,19975, -5811, 2569, -1135, 446, -142, 32, -3, \
+-4, 34, -148, 460, -1165, 2630, -5968, 21297,19639, -5764, 2550, -1126, 442, -140, 31, -3, \
+-4, 34, -149, 463, -1171, 2641, -5999, 21622,19301, -5713, 2530, -1117, 437, -139, 31, -3, \
+-4, 34, -150, 465, -1176, 2651, -6027, 21945,18962, -5660, 2508, -1107, 433, -137, 31, -3, \
+-4, 35, -151, 467, -1180, 2660, -6052, 22264,18621, -5605, 2485, -1096, 428, -135, 30, -3, \
+-4, 35, -152, 469, -1183, 2666, -6073, 22581,18279, -5546, 2461, -1085, 423, -133, 29, -3, \
+-4, 35, -153, 471, -1186, 2672, -6090, 22895,17935, -5485, 2435, -1073, 418, -132, 29, -3, \
+-4, 36, -154, 472, -1188, 2675, -6104, 23205,17590, -5421, 2409, -1060, 413, -130, 28, -3, \
+-4, 36, -154, 474, -1190, 2677, -6114, 23513,17244, -5354, 2381, -1047, 407, -128, 28, -3, \
+-4, 36, -155, 474, -1190, 2677, -6120, 23817,16897, -5285, 2352, -1034, 401, -125, 27, -3, \
+-4, 37, -155, 475, -1190, 2675, -6122, 24118,16549, -5214, 2321, -1020, 395, -123, 27, -3, \
+-4, 37, -156, 475, -1189, 2672, -6121, 24416,16200, -5140, 2290, -1005, 389, -121, 26, -2, \
+-5, 37, -156, 475, -1187, 2666, -6115, 24710,15851, -5064, 2257, -991, 383, -119, 26, -2, \
+-5, 37, -156, 474, -1185, 2659, -6106, 25000,15501, -4986, 2224, -975, 377, -117, 25, -2, \
+-5, 37, -156, 474, -1182, 2651, -6093, 25286,15151, -4906, 2189, -959, 370, -114, 24, -2, \
+-5, 37, -156, 473, -1177, 2640, -6075, 25568,14801, -4824, 2154, -943, 363, -112, 24, -2, \
+-5, 37, -156, 471, -1172, 2628, -6054, 25847,14450, -4740, 2117, -927, 356, -110, 23, -2, \
+-5, 37, -156, 469, -1166, 2613, -6028, 26121,14100, -4654, 2080, -910, 349, -107, 23, -2, \
+-5, 38, -155, 467, -1160, 2597, -5998, 26390,13749, -4566, 2042, -892, 342, -105, 22, -2, \
+-5, 37, -155, 465, -1152, 2579, -5964, 26656,13399, -4476, 2003, -875, 335, -103, 21, -2, \
+-5, 37, -154, 462, -1144, 2559, -5925, 26917,13050, -4385, 1963, -857, 328, -100, 21, -2, \
+-5, 37, -153, 458, -1134, 2538, -5882, 27173,12701, -4293, 1922, -838, 320, -98, 20, -2, \
+-5, 37, -152, 455, -1124, 2514, -5835, 27425,12352, -4198, 1881, -820, 313, -95, 20, -2, \
+-5, 37, -151, 451, -1113, 2488, -5783, 27672,12004, -4103, 1839, -801, 305, -93, 19, -2, \
+-5, 37, -150, 447, -1101, 2461, -5727, 27914,11657, -4006, 1796, -782, 297, -90, 18, -1, \
+-5, 37, -149, 442, -1089, 2431, -5666, 28151,11311, -3908, 1753, -762, 290, -87, 18, -1, \
+-5, 37, -147, 437, -1075, 2400, -5601, 28383,10966, -3808, 1709, -743, 282, -85, 17, -1, \
+-5, 36, -146, 431, -1060, 2367, -5531, 28610,10622, -3708, 1664, -723, 274, -82, 17, -1, \
+-5, 36, -144, 426, -1045, 2332, -5457, 28831,10280, -3607, 1619, -703, 266, -80, 16, -1, \
+-5, 36, -142, 419, -1029, 2294, -5378, 29048,9939, -3504, 1574, -683, 258, -77, 15, -1, \
+-5, 35, -140, 413, -1012, 2255, -5294, 29259,9599, -3401, 1528, -662, 250, -75, 15, -1, \
+-5, 35, -138, 406, -993, 2214, -5206, 29464,9261, -3297, 1482, -642, 242, -72, 14, -1, \
+-5, 34, -136, 399, -974, 2171, -5113, 29664,8925, -3192, 1435, -621, 234, -69, 14, -1, \
+-5, 34, -133, 391, -955, 2126, -5015, 29858,8591, -3086, 1388, -600, 225, -67, 13, -1, \
+-5, 33, -131, 383, -934, 2080, -4912, 30047,8259, -2980, 1341, -579, 217, -64, 13, -1, \
+-5, 33, -128, 374, -912, 2031, -4805, 30229,7928, -2874, 1293, -558, 209, -62, 12, -1, \
+-5, 32, -125, 366, -890, 1980, -4693, 30406,7600, -2767, 1245, -537, 201, -59, 12, -1, \
+-5, 31, -123, 356, -866, 1928, -4576, 30577,7274, -2659, 1197, -516, 193, -57, 11, -1, \
+-5, 31, -119, 347, -842, 1873, -4455, 30742,6951, -2552, 1149, -495, 185, -54, 10, -1, \
+-4, 30, -116, 337, -817, 1817, -4329, 30901,6629, -2444, 1101, -474, 176, -52, 10, -1, \
+-4, 29, -113, 326, -791, 1759, -4197, 31053,6311, -2336, 1052, -453, 168, -49, 9, -1, \
+-4, 28, -109, 316, -764, 1699, -4061, 31200,5995, -2228, 1004, -431, 160, -47, 9, -1, \
+-4, 27, -106, 305, -737, 1637, -3921, 31340,5682, -2119, 955, -410, 152, -44, 8, -1, \
+-4, 27, -102, 293, -708, 1573, -3775, 31474,5371, -2011, 907, -389, 144, -42, 8, 0, \
+-4, 26, -98, 281, -679, 1507, -3625, 31601,5064, -1903, 858, -368, 136, -39, 7, 0, \
+-4, 25, -94, 269, -648, 1440, -3470, 31722,4760, -1796, 810, -347, 128, -37, 7, 0, \
+-4, 24, -90, 256, -617, 1371, -3310, 31837,4458, -1688, 761, -326, 120, -34, 6, 0, \
+-4, 22, -85, 243, -586, 1300, -3145, 31945,4160, -1581, 713, -305, 112, -32, 6, 0, \
+-3, 21, -81, 230, -553, 1228, -2976, 32047,3865, -1474, 665, -284, 104, -30, 5, 0, \
+-3, 20, -76, 216, -520, 1153, -2802, 32142,3574, -1367, 617, -263, 97, -28, 5, 0, \
+-3, 19, -71, 202, -486, 1077, -2623, 32230,3285, -1261, 569, -243, 89, -25, 5, 0, \
+-3, 18, -66, 188, -451, 1000, -2440, 32312,3001, -1156, 521, -222, 81, -23, 4, 0, \
+-3, 16, -61, 173, -415, 921, -2251, 32387,2720, -1051, 474, -202, 74, -21, 4, 0, \
+-2, 15, -56, 158, -379, 840, -2058, 32455,2442, -947, 427, -182, 66, -19, 3, 0, \
+-2, 14, -51, 143, -342, 758, -1861, 32517,2168, -843, 381, -162, 59, -17, 3, 0, \
+-2, 12, -45, 127, -304, 674, -1659, 32572,1898, -740, 334, -142, 52, -14, 3, 0, \
+-2, 11, -40, 111, -265, 588, -1452, 32620,1632, -639, 288, -122, 44, -12, 2, 0, \
+-2, 9, -34, 95, -226, 502, -1241, 32661,1370, -538, 243, -103, 37, -10, 2, 0, \
+-1, 8, -28, 78, -187, 413, -1025, 32695,1112, -438, 197, -84, 30, -8, 1, 0, \
+-1, 6, -22, 61, -146, 324, -805, 32722,857, -338, 153, -65, 23, -6, 1, 0, \
+-1, 4, -16, 44, -105, 233, -581, 32743,607, -240, 108, -46, 17, -5, 1, 0, \
+0, 3, -10, 27, -63, 141, -352, 32757,361, -143, 65, -27, 10, -3, 0, 0, \
+0, 1, -3, 9, -21, 47, -118, 32764,119, -47, 21, -9, 3, -1, 0, 0, \
+}
--- a/libao2/fir.h	Sat Feb 16 13:06:45 2002 +0000
+++ b/libao2/fir.h	Sat Feb 16 13:08:14 2002 +0000
@@ -11,123 +11,75 @@
 #ifndef __FIR_H__
 #define __FIR_H__
 
-/* 4, 8 and 16 tap FIR filters implemented using SSE instructions 
-   int16_t* x Input data
-   int16_t* y Output value
-   int16_t* w Filter weights 
-   
-   C function
-   for(int i = 0 ; i < L ; i++)
-     *y += w[i]*x[i];
-*/
-
-#ifdef HAVE_SSE
+/* Fixpoint 16 bit fir filter FIR filter. The filter is implemented
+both in C and MMX assembly. The filter consists of one macro
+UPDATE_QUE and one inline function firn. The macro can be used for
+adding new data to the circular buffer used by the filter firn.
+Limitations: max length of n = 16*4 and n must be multiple of 4 (pad
+fiter with zeros for other lengths). Sometimes it works with filters
+longer than 4*16 (the problem is overshoot and the acumulated energy
+in the filter taps). */
 
-// This block should be MMX only compatible, but it isn't...
-#ifdef L4
-#define LOAD_QUE(x) \
-        __asm __volatile("movq %0, %%mm2\n\t" \
-                         :                    \
-                         :"m"((x)[0])         \
-                         :"memory");
-#define SAVE_QUE(x) \
-        __asm __volatile("movq %%mm2, %0\n\t" \
-                         "emms          \n\t" \
-                         :"=m"(x[0])          \
-                         :                    \
-                         :"memory");
-#define UPDATE_QUE(in) \
-        __asm __volatile("psllq   $16,   %%mm2\n\t"    \
-                         "pinsrw  $0,    %0,%%mm2\n\t" \
-                          :                            \
-                          :"m" ((in)[0])               \
-                          :"memory");                  
-#define FIR(x,w,y) \
-        __asm __volatile("movq	  %%mm2, %%mm0\n\t" \
-                         "pmaddwd %1,    %%mm0\n\t" \
-                         "movq    %%mm0, %%mm1\n\t" \
-                         "psrlq   $32, 	 %%mm1\n\t" \
-                         "paddd   %%mm0, %%mm1\n\t" \
-                         "movd    %%mm1, %%esi\n\t" \
-                         "shrl    $16,   %%esi\n\t" \
-                         "movw    %%si,  %0\n\t"    \
-			 : "=m" ((y)[0])            \
-			 : "m" ((w)[0])             \
-			 : "memory", "%esi"); 
-#endif /* L4 */
+#ifdef HAVE_MMX
+inline int32_t firn(int16_t* x, int16_t* w, int16_t n)
+{
+  register int32_t y; // Output
+  // Prologue
+  asm volatile(" pxor %mm1, %mm1;\n" ); // Clear buffer yt
+  // Main loop
+  while((n-=4)>=0){
+    asm volatile(
+	" movq 		(%1),	%%mm0;\n"  // Load x(n:n+4)
+	" pmaddwd	(%0),	%%mm0;\n"  // yt(n:n+1)=sum(x(n:n+4).*w(n:n+4))
+	" psrld	      	$16,	%%mm0;\n"  // yt(n:n+1)=yt(n:n+1)>>16
+	" paddd	 	%%mm0,	%%mm1;\n"  // yt(n:n+1)=yt(n-2:n-1)+yt(n:n+1)
+	:: "r" (w), "r" (x));
+    w+=4; x+=4;
+  }
+  // Epilogue
+  asm volatile(
+	" movq        	%%mm1, 	%%mm0;\n"  
+	" punpckhdq   	%%mm1, 	%%mm0;\n"  
+	" paddd       	%%mm0, 	%%mm1;\n"  //yt(n)=yt(n)+yt(n+1)
+	" movd        	%%mm1, 	%0   ;\n"  //y=yt
+	" emms                       ;\n"
+	: "=&r" (y));
+  return y;
+}
 
-// It is possible to make the 8 bit filter a lot faster by using the
-// 128 bit registers, feel free to optimize.
+#else /* HAVE_MMX */
+
+// Same thing as above but in C
+inline int32_t firn(int16_t* x, int16_t* w, int16_t n)
+{
+  register int32_t y=0;
+  while((n-=4) >=0)
+    y+=w[n]*x[n]+w[n+1]*x[n+1]+w[n+2]*x[n+2]+w[n+3]*x[n+3] >> 16;
+  return y;
+}
+
+#endif /* HAVE_MMX */
+
+// Macro to add new data to circular queue
+#define UPDATE_QUE(ind,xq,xid) \
+  xid=(--xid)&(L-1);      \
+  xq[xid]=xq[xid+L]=*(ind);
+
 #ifdef L8
-#define LOAD_QUE(x) \
-        __asm __volatile("movq %0, %%mm5\n\t" \
-                         "movq %1, %%mm4\n\t" \
-                         :                    \
-                         :"m"((x)[0]),        \
-                          "m"((x)[4])         \
-                         :"memory");
-#define SAVE_QUE(x) \
-        __asm __volatile("movq %%mm5, %0\n\t" \
-                         "movq %%mm4, %1\n\t" \
-                         "emms          \n\t" \
-                         :"=m"((x)[0]),       \
-                          "=m"((x)[4])        \
-                         :                    \
-                         :"memory");
-
-// Below operation could replace line 2 to 5 in macro below but can
-// not cause of compiler bug ???
-// "pextrw $3, %%mm5,%%eax\n\t"
-#define UPDATE_QUE(in) \
-        __asm __volatile("psllq    $16,   %%mm4\n\t"        \
-                         "movq	   %%mm5, %%mm0\n\t" 	    \
-                         "psrlq    $48,   %%mm0\n\t"        \
-                         "movd     %%mm0, %%eax\n\t"        \
-			 "pinsrw   $0,    %%eax,%%mm4\n\t"  \
-                         "psllq    $16,   %%mm5\n\t"        \
-                         "pinsrw   $0,    %0,%%mm5\n\t"     \
-                          :                                 \
-                          :"m" ((in)[0])                    \
-                          :"memory", "%eax");                  
-#define FIR(x,w,y) \
-        __asm __volatile("movq	  %%mm5, %%mm0\n\t" \
-                         "pmaddwd %1,    %%mm0\n\t" \
-                         "movq	  %%mm4, %%mm1\n\t" \
-                         "pmaddwd %2,    %%mm1\n\t" \
-                         "paddd   %%mm1, %%mm0\n\t" \
-                         "movq    %%mm0, %%mm1\n\t" \
-                         "psrlq   $32, 	 %%mm1\n\t" \
-                         "paddd   %%mm0, %%mm1\n\t" \
-                         "movd    %%mm1, %%esi\n\t" \
-                         "shrl    $16,   %%esi\n\t" \
-                         "movw    %%si,  %0\n\t"    \
-			 : "=m" ((y)[0])            \
-			 : "m" ((w)[0]),            \
-			   "m" ((w)[4])             \
-			 : "memory", "%esi"); 
-#endif /* L8 */
-
-#else /* HAVE_SSE */
-
-#define LOAD_QUE(x)
-#define SAVE_QUE(x)
-#define UPDATE_QUE(inm) \
-  xi=(--xi)&(L-1);     \
-  x[xi]=x[xi+L]=*(inm);
-
-#ifdef L4
-#define FIR(x,w,y) \
-        y[0]=(w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16;
-#else
+#ifdef HAVE_MMX
+#define FIR(x,w,y) *y=(int16_t)firn(x,w,8);
+#else /* HAVE_MMX */
+// Unrolled loop to speed up execution 
 #define FIR(x,w,y){ \
   int16_t a = (w[0]*x[0]+w[1]*x[1]+w[2]*x[2]+w[3]*x[3]) >> 16; \
   int16_t b = (w[4]*x[4]+w[5]*x[5]+w[6]*x[6]+w[7]*x[7]) >> 16; \
   y[0]      = a+b; \
 }
-#endif /* L4 */
+#endif /* HAVE_MMX */
+#endif /* L8 */
 
-#endif /* HAVE_SSE */
+#ifdef L16
+#define FIR(x,w,y) *y=(int16_t)firn(x,w,16);
+#endif /* L16 */
 
 #endif /* __FIR_H__ */
-
-
--- a/libao2/pl_resample.c	Sat Feb 16 13:06:45 2002 +0000
+++ b/libao2/pl_resample.c	Sat Feb 16 13:08:14 2002 +0000
@@ -40,22 +40,22 @@
 #define max(a,b)   (((a) > (b)) ? (a) : (b))
 
 /* Below definition selects the length of each poly phase component.
-   Valid definitions are L4 and L8, where the number denotes the
+   Valid definitions are L8 and L16, where the number denotes the
    length of the filter. This definition affects the computational
    complexity (see play()), the performance (see filter.h) and the
-   memory usage. For now the filterlenght is choosen to 4 and without
-   assembly optimization if no SSE is present.
+   memory usage. The filterlenght is choosen to 8 if the machine is
+   slow and to 16 if the machine is fast and has MMX.  
 */
 
-// #ifdef HAVE_SSE
+#if !defined(HAVE_SSE) && !defined(HAVE_3DNOW) //This machine is slow
 #define L8    	1	// Filter bank type
 #define W 	W8	// Filter bank parameters
 #define L   	8	// Filter length
-// #else	
-// #define L4	1
-// #define W 	W4
-// #define L   	4
-// #endif
+#else	// Fat machine
+#define L16	1
+#define W 	W16
+#define L   	16
+#endif
 
 #define CH  6	// Max number of channels
 #define UP  128  /* Up sampling factor. Increasing this value will
@@ -188,14 +188,12 @@
 
     wi = pwi; xi = pxi;
 
-    LOAD_QUE(x);
     while(in < end){
       register uint16_t	i = inc;
       if(wi<level) i++;
 
-      UPDATE_QUE(in);
+      UPDATE_QUE(in,x,xi);
       in+=nch;
-      
       while(i--){
 	// Run the FIR filter
 	FIR((&x[xi]),(&w[wi*L]),out);
@@ -204,7 +202,6 @@
 	wi=(wi+dn)%up;
       }
     }
-    SAVE_QUE(x);
   }
 
   // Save values that needs to be kept for next time
@@ -243,10 +240,9 @@
     register int16_t* 	end   = in+ao_plugin_data.len/2;
     i = pi; wi = pwi; xi = pxi;
 
-    LOAD_QUE(x);
     while(in < end){
 
-      UPDATE_QUE(in);
+      UPDATE_QUE(in,x,xi);
       in+=nch;
       
       if(!--i){
@@ -262,7 +258,6 @@
 	if(wi<level) i++;
       }
     }
-    SAVE_QUE(x);
   }
   // Save values that needs to be kept for next time
   pwi = wi;