comparison i386/dsputil_mmx_rnd.h @ 448:e8c8ca9106aa libavcodec

* removed MANGLE from macros for setting constants * using MOVQ_WONE/MOVQ_BFE as two instruction instead of static memory value access as its always faster * PAVGB_MMX macro is using now mm6 -> mm7 is unmodified * replaced original pixels_xy2_mmx with new faster and equal implementation * replaced usage of mm7 for other then ZERO contstant in _rnd & _avg file with mm6
author kabi
date Thu, 30 May 2002 15:14:56 +0000
parents 810f726ee3cc
children b94e82d31b06
comparison
equal deleted inserted replaced
447:810f726ee3cc 448:e8c8ca9106aa
22 */ 22 */
23 23
24 // put_pixels 24 // put_pixels
25 static void DEF(put, pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 25 static void DEF(put, pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
26 { 26 {
27 MOVQ_BFE(mm6);
27 __asm __volatile( 28 __asm __volatile(
28 MOVQ_BFE(%%mm7)
29 "lea (%3, %3), %%eax \n\t" 29 "lea (%3, %3), %%eax \n\t"
30 ".balign 8 \n\t" 30 ".balign 8 \n\t"
31 "1: \n\t" 31 "1: \n\t"
32 "movq (%1), %%mm0 \n\t" 32 "movq (%1), %%mm0 \n\t"
33 "movq 1(%1), %%mm1 \n\t" 33 "movq 1(%1), %%mm1 \n\t"
34 "movq (%1, %3), %%mm2 \n\t" 34 "movq (%1, %3), %%mm2 \n\t"
35 "movq 1(%1, %3), %%mm3 \n\t" 35 "movq 1(%1, %3), %%mm3 \n\t"
36 PAVGBP(%%mm0, %%mm1, %%mm5, %%mm2, %%mm3, %%mm6) 36 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
37 "movq %%mm5, (%2) \n\t" 37 "movq %%mm4, (%2) \n\t"
38 "movq %%mm6, (%2, %3) \n\t" 38 "movq %%mm5, (%2, %3) \n\t"
39 "addl %%eax, %1 \n\t" 39 "addl %%eax, %1 \n\t"
40 "addl %%eax, %2 \n\t" 40 "addl %%eax, %2 \n\t"
41 "movq (%1), %%mm0 \n\t" 41 "movq (%1), %%mm0 \n\t"
42 "movq 1(%1), %%mm1 \n\t" 42 "movq 1(%1), %%mm1 \n\t"
43 "movq (%1, %3), %%mm2 \n\t" 43 "movq (%1, %3), %%mm2 \n\t"
44 "movq 1(%1, %3), %%mm3 \n\t" 44 "movq 1(%1, %3), %%mm3 \n\t"
45 PAVGBP(%%mm0, %%mm1, %%mm5, %%mm2, %%mm3, %%mm6) 45 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
46 "movq %%mm5, (%2) \n\t" 46 "movq %%mm4, (%2) \n\t"
47 "movq %%mm6, (%2, %3) \n\t" 47 "movq %%mm5, (%2, %3) \n\t"
48 "addl %%eax, %1 \n\t" 48 "addl %%eax, %1 \n\t"
49 "addl %%eax, %2 \n\t" 49 "addl %%eax, %2 \n\t"
50 "subl $4, %0 \n\t" 50 "subl $4, %0 \n\t"
51 "jnz 1b \n\t" 51 "jnz 1b \n\t"
52 :"+g"(h), "+S"(pixels), "+D"(block) 52 :"+g"(h), "+S"(pixels), "+D"(block)
54 :"eax", "memory"); 54 :"eax", "memory");
55 } 55 }
56 56
57 static void DEF(put, pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 57 static void DEF(put, pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
58 { 58 {
59 __asm __volatile( 59 MOVQ_BFE(mm6);
60 MOVQ_BFE(%%mm7) 60 __asm __volatile(
61 "lea (%3, %3), %%eax \n\t" 61 "lea (%3, %3), %%eax \n\t"
62 "movq (%1), %%mm0 \n\t" 62 "movq (%1), %%mm0 \n\t"
63 ".balign 8 \n\t" 63 ".balign 8 \n\t"
64 "1: \n\t" 64 "1: \n\t"
65 "movq (%1, %3), %%mm1 \n\t" 65 "movq (%1, %3), %%mm1 \n\t"
66 "movq (%1, %%eax),%%mm2 \n\t" 66 "movq (%1, %%eax),%%mm2 \n\t"
67 PAVGBP(%%mm1, %%mm0, %%mm5, %%mm2, %%mm1, %%mm6) 67 PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
68 "movq %%mm5, (%2) \n\t" 68 "movq %%mm4, (%2) \n\t"
69 "movq %%mm6, (%2, %3) \n\t" 69 "movq %%mm5, (%2, %3) \n\t"
70 "addl %%eax, %1 \n\t" 70 "addl %%eax, %1 \n\t"
71 "addl %%eax, %2 \n\t" 71 "addl %%eax, %2 \n\t"
72 "movq (%1, %3), %%mm1 \n\t" 72 "movq (%1, %3), %%mm1 \n\t"
73 "movq (%1, %%eax),%%mm0 \n\t" 73 "movq (%1, %%eax),%%mm0 \n\t"
74 PAVGBP(%%mm1, %%mm2, %%mm5, %%mm0, %%mm1, %%mm6) 74 PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
75 "movq %%mm5, (%2) \n\t" 75 "movq %%mm4, (%2) \n\t"
76 "movq %%mm6, (%2, %3) \n\t" 76 "movq %%mm5, (%2, %3) \n\t"
77 "addl %%eax, %1 \n\t" 77 "addl %%eax, %1 \n\t"
78 "addl %%eax, %2 \n\t" 78 "addl %%eax, %2 \n\t"
79 "subl $4, %0 \n\t" 79 "subl $4, %0 \n\t"
80 "jnz 1b \n\t" 80 "jnz 1b \n\t"
81 :"+g"(h), "+S"(pixels), "+D"(block) 81 :"+g"(h), "+S"(pixels), "+D"(block)
82 :"r"(line_size) 82 :"r"(line_size)
83 :"eax", "memory"); 83 :"eax", "memory");
84 } 84 }
85 85
86 // ((a + b)/2 + (c + d)/2)/2 86 static void DEF(put, pixels_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
87 // not sure if this is properly replacing original code
88 // - ok it's really unsable at this moment -> disabled
89 static void DEF(put, disabled_pixels_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
90 { 87 {
88 MOVQ_ZERO(mm7);
89 SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
91 __asm __volatile( 90 __asm __volatile(
92 MOVQ_BFE(%%mm7)
93 "lea (%3, %3), %%eax \n\t"
94 "movq (%1), %%mm0 \n\t"
95 "movq (%1), %%mm0 \n\t" 91 "movq (%1), %%mm0 \n\t"
96 "movq 1(%1), %%mm1 \n\t" 92 "movq 1(%1), %%mm4 \n\t"
97 ".balign 8 \n\t" 93 "movq %%mm0, %%mm1 \n\t"
94 "movq %%mm4, %%mm5 \n\t"
95 "punpcklbw %%mm7, %%mm0 \n\t"
96 "punpcklbw %%mm7, %%mm4 \n\t"
97 "punpckhbw %%mm7, %%mm1 \n\t"
98 "punpckhbw %%mm7, %%mm5 \n\t"
99 "paddusw %%mm0, %%mm4 \n\t"
100 "paddusw %%mm1, %%mm5 \n\t"
101 "xorl %%eax, %%eax \n\t"
102 "addl %3, %1 \n\t"
103 ".balign 4 \n\t"
98 "1: \n\t" 104 "1: \n\t"
99 "movq (%1, %3), %%mm2 \n\t" 105 "movq (%1, %%eax), %%mm0 \n\t"
100 "movq 1(%1, %3), %%mm3 \n\t" 106 "movq 1(%1, %%eax), %%mm2 \n\t"
101 PAVGBP(%%mm2, %%mm0, %%mm4, %%mm3, %%mm1, %%mm5) 107 "movq %%mm0, %%mm1 \n\t"
102 //PAVGBR(%%mm2, %%mm0, %%mm4) 108 "movq %%mm2, %%mm3 \n\t"
103 //PAVGBR(%%mm3, %%mm1, %%mm5) 109 "punpcklbw %%mm7, %%mm0 \n\t"
104 PAVGB(%%mm4, %%mm5) 110 "punpcklbw %%mm7, %%mm2 \n\t"
105 "movq %%mm6, (%2) \n\t" 111 "punpckhbw %%mm7, %%mm1 \n\t"
112 "punpckhbw %%mm7, %%mm3 \n\t"
113 "paddusw %%mm2, %%mm0 \n\t"
114 "paddusw %%mm3, %%mm1 \n\t"
115 "paddusw %%mm6, %%mm4 \n\t"
116 "paddusw %%mm6, %%mm5 \n\t"
117 "paddusw %%mm0, %%mm4 \n\t"
118 "paddusw %%mm1, %%mm5 \n\t"
119 "psrlw $2, %%mm4 \n\t"
120 "psrlw $2, %%mm5 \n\t"
121 "packuswb %%mm5, %%mm4 \n\t"
122 "movq %%mm4, (%2, %%eax) \n\t"
123 "addl %3, %%eax \n\t"
106 124
107 "movq (%1, %%eax), %%mm0 \n\t" 125 "movq (%1, %%eax), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
108 "movq 1(%1, %%eax), %%mm1 \n\t" 126 "movq 1(%1, %%eax), %%mm4 \n\t"
109 PAVGBP(%%mm0, %%mm2, %%mm4, %%mm1, %%mm3, %%mm5) 127 "movq %%mm2, %%mm3 \n\t"
110 //PAVGBR(%%mm0, %%mm2, %%mm4) 128 "movq %%mm4, %%mm5 \n\t"
111 //PAVGBR(%%mm1, %%mm3, %%mm5) 129 "punpcklbw %%mm7, %%mm2 \n\t"
112 PAVGB(%%mm4, %%mm5) 130 "punpcklbw %%mm7, %%mm4 \n\t"
113 "movq %%mm6, (%2, %3) \n\t" 131 "punpckhbw %%mm7, %%mm3 \n\t"
114 "addl %%eax, %1 \n\t" 132 "punpckhbw %%mm7, %%mm5 \n\t"
115 "addl %%eax, %2 \n\t" 133 "paddusw %%mm2, %%mm4 \n\t"
134 "paddusw %%mm3, %%mm5 \n\t"
135 "paddusw %%mm6, %%mm0 \n\t"
136 "paddusw %%mm6, %%mm1 \n\t"
137 "paddusw %%mm4, %%mm0 \n\t"
138 "paddusw %%mm5, %%mm1 \n\t"
139 "psrlw $2, %%mm0 \n\t"
140 "psrlw $2, %%mm1 \n\t"
141 "packuswb %%mm1, %%mm0 \n\t"
142 "movq %%mm0, (%2, %%eax) \n\t"
143 "addl %3, %%eax \n\t"
116 144
117 "subl $2, %0 \n\t" 145 "subl $2, %0 \n\t"
118
119 "jnz 1b \n\t" 146 "jnz 1b \n\t"
120 :"+g"(h), "+S"(pixels), "+D"(block) 147 :"+g"(h), "+S"(pixels)
121 :"r"(line_size) 148 :"D"(block), "r"(line_size)
122 :"eax", "memory"); 149 :"eax", "memory");
123 } 150 }
124 151
125 // avg_pixels 152 // avg_pixels
126