Mercurial > libavcodec.hg
annotate i386/dsputil_mmx_rnd.h @ 4166:eced83504436 libavcodec
mp3 header (de)compression bitstream filter
this will make mp3 frames 4 bytes smaller, it will not give you binary identical mp3 files, but it will give you mp3 files which decode to binary identical output
this will only work in containers providing at least packet size, sample_rate and number of channels
bugreports about mp3 files for which this fails are welcome
and this is experimental (dont expect compatibility and dont even expect to be able to decompress what you compressed, hell dont even expect this to work without editing the source a little)
author | michael |
---|---|
date | Fri, 10 Nov 2006 01:41:53 +0000 |
parents | c8c591fe26f8 |
children | 28ebdd244a07 |
rev | line source |
---|---|
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
1 /* |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
2 * DSP utils mmx functions are compiled twice for rnd/no_rnd |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
3 * Copyright (c) 2000, 2001 Fabrice Bellard. |
1739
07a484280a82
copyright year update of the files i touched and remembered, things look annoyingly unmaintained otherwise
michael
parents:
1064
diff
changeset
|
4 * Copyright (c) 2003-2004 Michael Niedermayer <michaelni@gmx.at> |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
5 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3576
diff
changeset
|
6 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3576
diff
changeset
|
7 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3576
diff
changeset
|
8 * FFmpeg is free software; you can redistribute it and/or |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
9 * modify it under the terms of the GNU Lesser General Public |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
10 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3576
diff
changeset
|
11 * version 2.1 of the License, or (at your option) any later version. |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
12 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3576
diff
changeset
|
13 * FFmpeg is distributed in the hope that it will be useful, |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
16 * Lesser General Public License for more details. |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
17 * |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
18 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3576
diff
changeset
|
19 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2979
diff
changeset
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
21 * |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
22 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
23 * mostly rewritten by Michael Niedermayer <michaelni@gmx.at> |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
24 * and improved by Zdenek Kabelac <kabi@users.sf.net> |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
25 */ |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
26 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
27 // put_pixels |
1064 | 28 static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
29 { |
448 | 30 MOVQ_BFE(mm6); |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
31 __asm __volatile( |
2979 | 32 "lea (%3, %3), %%"REG_a" \n\t" |
3576
f7125bf10892
Support for MacIntel, last part: balign directives
gpoirier
parents:
3036
diff
changeset
|
33 ASMALIGN(3) |
2979 | 34 "1: \n\t" |
35 "movq (%1), %%mm0 \n\t" | |
36 "movq 1(%1), %%mm1 \n\t" | |
37 "movq (%1, %3), %%mm2 \n\t" | |
38 "movq 1(%1, %3), %%mm3 \n\t" | |
39 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
40 "movq %%mm4, (%2) \n\t" | |
41 "movq %%mm5, (%2, %3) \n\t" | |
42 "add %%"REG_a", %1 \n\t" | |
43 "add %%"REG_a", %2 \n\t" | |
44 "movq (%1), %%mm0 \n\t" | |
45 "movq 1(%1), %%mm1 \n\t" | |
46 "movq (%1, %3), %%mm2 \n\t" | |
47 "movq 1(%1, %3), %%mm3 \n\t" | |
48 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
49 "movq %%mm4, (%2) \n\t" | |
50 "movq %%mm5, (%2, %3) \n\t" | |
51 "add %%"REG_a", %1 \n\t" | |
52 "add %%"REG_a", %2 \n\t" | |
53 "subl $4, %0 \n\t" | |
54 "jnz 1b \n\t" | |
55 :"+g"(h), "+S"(pixels), "+D"(block) | |
56 :"r"((long)line_size) | |
57 :REG_a, "memory"); | |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
58 } |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
59 |
2864
95bac7109ff0
Kill some compiler warnings. Compiled code verified identical after changes.
mru
parents:
2293
diff
changeset
|
60 static void attribute_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) |
954 | 61 { |
62 MOVQ_BFE(mm6); | |
63 __asm __volatile( | |
2979 | 64 "testl $1, %0 \n\t" |
65 " jz 1f \n\t" | |
66 "movq (%1), %%mm0 \n\t" | |
67 "movq (%2), %%mm1 \n\t" | |
68 "add %4, %1 \n\t" | |
69 "add $8, %2 \n\t" | |
70 PAVGB(%%mm0, %%mm1, %%mm4, %%mm6) | |
71 "movq %%mm4, (%3) \n\t" | |
72 "add %5, %3 \n\t" | |
73 "decl %0 \n\t" | |
3576
f7125bf10892
Support for MacIntel, last part: balign directives
gpoirier
parents:
3036
diff
changeset
|
74 ASMALIGN(3) |
2979 | 75 "1: \n\t" |
76 "movq (%1), %%mm0 \n\t" | |
77 "movq (%2), %%mm1 \n\t" | |
78 "add %4, %1 \n\t" | |
79 "movq (%1), %%mm2 \n\t" | |
80 "movq 8(%2), %%mm3 \n\t" | |
81 "add %4, %1 \n\t" | |
82 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
83 "movq %%mm4, (%3) \n\t" | |
84 "add %5, %3 \n\t" | |
85 "movq %%mm5, (%3) \n\t" | |
86 "add %5, %3 \n\t" | |
87 "movq (%1), %%mm0 \n\t" | |
88 "movq 16(%2), %%mm1 \n\t" | |
89 "add %4, %1 \n\t" | |
90 "movq (%1), %%mm2 \n\t" | |
91 "movq 24(%2), %%mm3 \n\t" | |
92 "add %4, %1 \n\t" | |
93 "add $32, %2 \n\t" | |
94 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
95 "movq %%mm4, (%3) \n\t" | |
96 "add %5, %3 \n\t" | |
97 "movq %%mm5, (%3) \n\t" | |
98 "add %5, %3 \n\t" | |
99 "subl $4, %0 \n\t" | |
100 "jnz 1b \n\t" | |
967 | 101 #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used |
102 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) | |
103 #else | |
104 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) | |
105 #endif | |
2979 | 106 :"S"((long)src1Stride), "D"((long)dstStride) |
107 :"memory"); | |
954 | 108 } |
109 | |
1064 | 110 static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
651 | 111 { |
112 MOVQ_BFE(mm6); | |
113 __asm __volatile( | |
2979 | 114 "lea (%3, %3), %%"REG_a" \n\t" |
3576
f7125bf10892
Support for MacIntel, last part: balign directives
gpoirier
parents:
3036
diff
changeset
|
115 ASMALIGN(3) |
2979 | 116 "1: \n\t" |
117 "movq (%1), %%mm0 \n\t" | |
118 "movq 1(%1), %%mm1 \n\t" | |
119 "movq (%1, %3), %%mm2 \n\t" | |
120 "movq 1(%1, %3), %%mm3 \n\t" | |
121 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
122 "movq %%mm4, (%2) \n\t" | |
123 "movq %%mm5, (%2, %3) \n\t" | |
124 "movq 8(%1), %%mm0 \n\t" | |
125 "movq 9(%1), %%mm1 \n\t" | |
126 "movq 8(%1, %3), %%mm2 \n\t" | |
127 "movq 9(%1, %3), %%mm3 \n\t" | |
128 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
129 "movq %%mm4, 8(%2) \n\t" | |
130 "movq %%mm5, 8(%2, %3) \n\t" | |
131 "add %%"REG_a", %1 \n\t" | |
132 "add %%"REG_a", %2 \n\t" | |
133 "movq (%1), %%mm0 \n\t" | |
134 "movq 1(%1), %%mm1 \n\t" | |
135 "movq (%1, %3), %%mm2 \n\t" | |
136 "movq 1(%1, %3), %%mm3 \n\t" | |
137 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
138 "movq %%mm4, (%2) \n\t" | |
139 "movq %%mm5, (%2, %3) \n\t" | |
140 "movq 8(%1), %%mm0 \n\t" | |
141 "movq 9(%1), %%mm1 \n\t" | |
142 "movq 8(%1, %3), %%mm2 \n\t" | |
143 "movq 9(%1, %3), %%mm3 \n\t" | |
144 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
145 "movq %%mm4, 8(%2) \n\t" | |
146 "movq %%mm5, 8(%2, %3) \n\t" | |
147 "add %%"REG_a", %1 \n\t" | |
148 "add %%"REG_a", %2 \n\t" | |
149 "subl $4, %0 \n\t" | |
150 "jnz 1b \n\t" | |
151 :"+g"(h), "+S"(pixels), "+D"(block) | |
152 :"r"((long)line_size) | |
153 :REG_a, "memory"); | |
651 | 154 } |
155 | |
2864
95bac7109ff0
Kill some compiler warnings. Compiled code verified identical after changes.
mru
parents:
2293
diff
changeset
|
156 static void attribute_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) |
954 | 157 { |
158 MOVQ_BFE(mm6); | |
159 __asm __volatile( | |
2979 | 160 "testl $1, %0 \n\t" |
161 " jz 1f \n\t" | |
162 "movq (%1), %%mm0 \n\t" | |
163 "movq (%2), %%mm1 \n\t" | |
164 "movq 8(%1), %%mm2 \n\t" | |
165 "movq 8(%2), %%mm3 \n\t" | |
166 "add %4, %1 \n\t" | |
167 "add $16, %2 \n\t" | |
168 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
169 "movq %%mm4, (%3) \n\t" | |
170 "movq %%mm5, 8(%3) \n\t" | |
171 "add %5, %3 \n\t" | |
172 "decl %0 \n\t" | |
3576
f7125bf10892
Support for MacIntel, last part: balign directives
gpoirier
parents:
3036
diff
changeset
|
173 ASMALIGN(3) |
2979 | 174 "1: \n\t" |
175 "movq (%1), %%mm0 \n\t" | |
176 "movq (%2), %%mm1 \n\t" | |
177 "movq 8(%1), %%mm2 \n\t" | |
178 "movq 8(%2), %%mm3 \n\t" | |
179 "add %4, %1 \n\t" | |
180 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
181 "movq %%mm4, (%3) \n\t" | |
182 "movq %%mm5, 8(%3) \n\t" | |
183 "add %5, %3 \n\t" | |
184 "movq (%1), %%mm0 \n\t" | |
185 "movq 16(%2), %%mm1 \n\t" | |
186 "movq 8(%1), %%mm2 \n\t" | |
187 "movq 24(%2), %%mm3 \n\t" | |
188 "add %4, %1 \n\t" | |
189 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) | |
190 "movq %%mm4, (%3) \n\t" | |
191 "movq %%mm5, 8(%3) \n\t" | |
192 "add %5, %3 \n\t" | |
193 "add $32, %2 \n\t" | |
194 "subl $2, %0 \n\t" | |
195 "jnz 1b \n\t" | |
967 | 196 #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used |
2979 | 197 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) |
967 | 198 #else |
2979 | 199 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) |
967 | 200 #endif |
2979 | 201 :"S"((long)src1Stride), "D"((long)dstStride) |
202 :"memory"); | |
954 | 203 } |
204 | |
1064 | 205 static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
206 { |
448 | 207 MOVQ_BFE(mm6); |
208 __asm __volatile( | |
2979 | 209 "lea (%3, %3), %%"REG_a" \n\t" |
210 "movq (%1), %%mm0 \n\t" | |
3576
f7125bf10892
Support for MacIntel, last part: balign directives
gpoirier
parents:
3036
diff
changeset
|
211 ASMALIGN(3) |
2979 | 212 "1: \n\t" |
213 "movq (%1, %3), %%mm1 \n\t" | |
214 "movq (%1, %%"REG_a"),%%mm2 \n\t" | |
215 PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) | |
216 "movq %%mm4, (%2) \n\t" | |
217 "movq %%mm5, (%2, %3) \n\t" | |
218 "add %%"REG_a", %1 \n\t" | |
219 "add %%"REG_a", %2 \n\t" | |
220 "movq (%1, %3), %%mm1 \n\t" | |
221 "movq (%1, %%"REG_a"),%%mm0 \n\t" | |
222 PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) | |
223 "movq %%mm4, (%2) \n\t" | |
224 "movq %%mm5, (%2, %3) \n\t" | |
225 "add %%"REG_a", %1 \n\t" | |
226 "add %%"REG_a", %2 \n\t" | |
227 "subl $4, %0 \n\t" | |
228 "jnz 1b \n\t" | |
229 :"+g"(h), "+S"(pixels), "+D"(block) | |
230 :"r"((long)line_size) | |
231 :REG_a, "memory"); | |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
232 } |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
233 |
1064 | 234 static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
235 { |
448 | 236 MOVQ_ZERO(mm7); |
237 SET_RND(mm6); // =2 for rnd and =1 for no_rnd version | |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
238 __asm __volatile( |
2979 | 239 "movq (%1), %%mm0 \n\t" |
240 "movq 1(%1), %%mm4 \n\t" | |
241 "movq %%mm0, %%mm1 \n\t" | |
242 "movq %%mm4, %%mm5 \n\t" | |
243 "punpcklbw %%mm7, %%mm0 \n\t" | |
244 "punpcklbw %%mm7, %%mm4 \n\t" | |
245 "punpckhbw %%mm7, %%mm1 \n\t" | |
246 "punpckhbw %%mm7, %%mm5 \n\t" | |
247 "paddusw %%mm0, %%mm4 \n\t" | |
248 "paddusw %%mm1, %%mm5 \n\t" | |
249 "xor %%"REG_a", %%"REG_a" \n\t" | |
250 "add %3, %1 \n\t" | |
3576
f7125bf10892
Support for MacIntel, last part: balign directives
gpoirier
parents:
3036
diff
changeset
|
251 ASMALIGN(3) |
2979 | 252 "1: \n\t" |
253 "movq (%1, %%"REG_a"), %%mm0 \n\t" | |
254 "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |
255 "movq %%mm0, %%mm1 \n\t" | |
256 "movq %%mm2, %%mm3 \n\t" | |
257 "punpcklbw %%mm7, %%mm0 \n\t" | |
258 "punpcklbw %%mm7, %%mm2 \n\t" | |
259 "punpckhbw %%mm7, %%mm1 \n\t" | |
260 "punpckhbw %%mm7, %%mm3 \n\t" | |
261 "paddusw %%mm2, %%mm0 \n\t" | |
262 "paddusw %%mm3, %%mm1 \n\t" | |
263 "paddusw %%mm6, %%mm4 \n\t" | |
264 "paddusw %%mm6, %%mm5 \n\t" | |
265 "paddusw %%mm0, %%mm4 \n\t" | |
266 "paddusw %%mm1, %%mm5 \n\t" | |
267 "psrlw $2, %%mm4 \n\t" | |
268 "psrlw $2, %%mm5 \n\t" | |
269 "packuswb %%mm5, %%mm4 \n\t" | |
270 "movq %%mm4, (%2, %%"REG_a") \n\t" | |
271 "add %3, %%"REG_a" \n\t" | |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
272 |
2979 | 273 "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 |
274 "movq 1(%1, %%"REG_a"), %%mm4 \n\t" | |
275 "movq %%mm2, %%mm3 \n\t" | |
276 "movq %%mm4, %%mm5 \n\t" | |
277 "punpcklbw %%mm7, %%mm2 \n\t" | |
278 "punpcklbw %%mm7, %%mm4 \n\t" | |
279 "punpckhbw %%mm7, %%mm3 \n\t" | |
280 "punpckhbw %%mm7, %%mm5 \n\t" | |
281 "paddusw %%mm2, %%mm4 \n\t" | |
282 "paddusw %%mm3, %%mm5 \n\t" | |
283 "paddusw %%mm6, %%mm0 \n\t" | |
284 "paddusw %%mm6, %%mm1 \n\t" | |
285 "paddusw %%mm4, %%mm0 \n\t" | |
286 "paddusw %%mm5, %%mm1 \n\t" | |
287 "psrlw $2, %%mm0 \n\t" | |
288 "psrlw $2, %%mm1 \n\t" | |
289 "packuswb %%mm1, %%mm0 \n\t" | |
290 "movq %%mm0, (%2, %%"REG_a") \n\t" | |
291 "add %3, %%"REG_a" \n\t" | |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
292 |
2979 | 293 "subl $2, %0 \n\t" |
294 "jnz 1b \n\t" | |
295 :"+g"(h), "+S"(pixels) | |
296 :"D"(block), "r"((long)line_size) | |
297 :REG_a, "memory"); | |
445
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
298 } |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
299 |
62c01dbdc1e0
* code with new PAVGB for MMX only CPU splited into separate file
kabi
parents:
diff
changeset
|
300 // avg_pixels |
2864
95bac7109ff0
Kill some compiler warnings. Compiled code verified identical after changes.
mru
parents:
2293
diff
changeset
|
301 static void attribute_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
2209 | 302 { |
303 MOVQ_BFE(mm6); | |
304 JUMPALIGN(); | |
305 do { | |
2979 | 306 __asm __volatile( |
307 "movd %0, %%mm0 \n\t" | |
308 "movd %1, %%mm1 \n\t" | |
309 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
310 "movd %%mm2, %0 \n\t" | |
311 :"+m"(*block) | |
312 :"m"(*pixels) | |
313 :"memory"); | |
314 pixels += line_size; | |
315 block += line_size; | |
2209 | 316 } |
317 while (--h); | |
318 } | |
319 | |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
320 // in case more speed is needed - unroling would certainly help |
1064 | 321 static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
322 { |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
323 MOVQ_BFE(mm6); |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
324 JUMPALIGN(); |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
325 do { |
2979 | 326 __asm __volatile( |
327 "movq %0, %%mm0 \n\t" | |
328 "movq %1, %%mm1 \n\t" | |
329 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
330 "movq %%mm2, %0 \n\t" | |
331 :"+m"(*block) | |
332 :"m"(*pixels) | |
333 :"memory"); | |
334 pixels += line_size; | |
335 block += line_size; | |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
336 } |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
337 while (--h); |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
338 } |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
339 |
1064 | 340 static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
651 | 341 { |
342 MOVQ_BFE(mm6); | |
343 JUMPALIGN(); | |
344 do { | |
2979 | 345 __asm __volatile( |
346 "movq %0, %%mm0 \n\t" | |
347 "movq %1, %%mm1 \n\t" | |
348 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
349 "movq %%mm2, %0 \n\t" | |
350 "movq 8%0, %%mm0 \n\t" | |
351 "movq 8%1, %%mm1 \n\t" | |
352 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
353 "movq %%mm2, 8%0 \n\t" | |
354 :"+m"(*block) | |
355 :"m"(*pixels) | |
356 :"memory"); | |
357 pixels += line_size; | |
358 block += line_size; | |
651 | 359 } |
360 while (--h); | |
361 } | |
362 | |
1064 | 363 static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
364 { |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
365 MOVQ_BFE(mm6); |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
366 JUMPALIGN(); |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
367 do { |
2979 | 368 __asm __volatile( |
369 "movq %1, %%mm0 \n\t" | |
370 "movq 1%1, %%mm1 \n\t" | |
371 "movq %0, %%mm3 \n\t" | |
372 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
373 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |
374 "movq %%mm0, %0 \n\t" | |
375 :"+m"(*block) | |
376 :"m"(*pixels) | |
377 :"memory"); | |
378 pixels += line_size; | |
379 block += line_size; | |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
380 } while (--h); |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
381 } |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
382 |
2024
f65d87bfdd5a
some of the warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents:
1739
diff
changeset
|
383 static __attribute__((unused)) void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) |
954 | 384 { |
385 MOVQ_BFE(mm6); | |
386 JUMPALIGN(); | |
387 do { | |
2979 | 388 __asm __volatile( |
389 "movq %1, %%mm0 \n\t" | |
390 "movq %2, %%mm1 \n\t" | |
391 "movq %0, %%mm3 \n\t" | |
392 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
393 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |
394 "movq %%mm0, %0 \n\t" | |
395 :"+m"(*dst) | |
396 :"m"(*src1), "m"(*src2) | |
397 :"memory"); | |
398 dst += dstStride; | |
954 | 399 src1 += src1Stride; |
400 src2 += 8; | |
401 } while (--h); | |
402 } | |
403 | |
1064 | 404 static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
651 | 405 { |
406 MOVQ_BFE(mm6); | |
407 JUMPALIGN(); | |
408 do { | |
2979 | 409 __asm __volatile( |
410 "movq %1, %%mm0 \n\t" | |
411 "movq 1%1, %%mm1 \n\t" | |
412 "movq %0, %%mm3 \n\t" | |
413 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
414 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |
415 "movq %%mm0, %0 \n\t" | |
416 "movq 8%1, %%mm0 \n\t" | |
417 "movq 9%1, %%mm1 \n\t" | |
418 "movq 8%0, %%mm3 \n\t" | |
419 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
420 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |
421 "movq %%mm0, 8%0 \n\t" | |
422 :"+m"(*block) | |
423 :"m"(*pixels) | |
424 :"memory"); | |
425 pixels += line_size; | |
426 block += line_size; | |
651 | 427 } while (--h); |
428 } | |
429 | |
2024
f65d87bfdd5a
some of the warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents:
1739
diff
changeset
|
430 static __attribute__((unused)) void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) |
954 | 431 { |
432 MOVQ_BFE(mm6); | |
433 JUMPALIGN(); | |
434 do { | |
2979 | 435 __asm __volatile( |
436 "movq %1, %%mm0 \n\t" | |
437 "movq %2, %%mm1 \n\t" | |
438 "movq %0, %%mm3 \n\t" | |
439 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
440 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |
441 "movq %%mm0, %0 \n\t" | |
442 "movq 8%1, %%mm0 \n\t" | |
443 "movq 8%2, %%mm1 \n\t" | |
444 "movq 8%0, %%mm3 \n\t" | |
445 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) | |
446 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) | |
447 "movq %%mm0, 8%0 \n\t" | |
448 :"+m"(*dst) | |
449 :"m"(*src1), "m"(*src2) | |
450 :"memory"); | |
451 dst += dstStride; | |
954 | 452 src1 += src1Stride; |
453 src2 += 16; | |
454 } while (--h); | |
455 } | |
456 | |
1064 | 457 static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
458 { |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
459 MOVQ_BFE(mm6); |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
460 __asm __volatile( |
2979 | 461 "lea (%3, %3), %%"REG_a" \n\t" |
462 "movq (%1), %%mm0 \n\t" | |
3576
f7125bf10892
Support for MacIntel, last part: balign directives
gpoirier
parents:
3036
diff
changeset
|
463 ASMALIGN(3) |
2979 | 464 "1: \n\t" |
465 "movq (%1, %3), %%mm1 \n\t" | |
466 "movq (%1, %%"REG_a"), %%mm2 \n\t" | |
467 PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) | |
468 "movq (%2), %%mm3 \n\t" | |
469 PAVGB(%%mm3, %%mm4, %%mm0, %%mm6) | |
470 "movq (%2, %3), %%mm3 \n\t" | |
471 PAVGB(%%mm3, %%mm5, %%mm1, %%mm6) | |
472 "movq %%mm0, (%2) \n\t" | |
473 "movq %%mm1, (%2, %3) \n\t" | |
474 "add %%"REG_a", %1 \n\t" | |
475 "add %%"REG_a", %2 \n\t" | |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
476 |
2979 | 477 "movq (%1, %3), %%mm1 \n\t" |
478 "movq (%1, %%"REG_a"), %%mm0 \n\t" | |
479 PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) | |
480 "movq (%2), %%mm3 \n\t" | |
481 PAVGB(%%mm3, %%mm4, %%mm2, %%mm6) | |
482 "movq (%2, %3), %%mm3 \n\t" | |
483 PAVGB(%%mm3, %%mm5, %%mm1, %%mm6) | |
484 "movq %%mm2, (%2) \n\t" | |
485 "movq %%mm1, (%2, %3) \n\t" | |
486 "add %%"REG_a", %1 \n\t" | |
487 "add %%"REG_a", %2 \n\t" | |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
488 |
2979 | 489 "subl $4, %0 \n\t" |
490 "jnz 1b \n\t" | |
491 :"+g"(h), "+S"(pixels), "+D"(block) | |
492 :"r"((long)line_size) | |
493 :REG_a, "memory"); | |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
494 } |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
495 |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
496 // this routine is 'slightly' suboptimal but mostly unused |
1064 | 497 static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
498 { |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
499 MOVQ_ZERO(mm7); |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
500 SET_RND(mm6); // =2 for rnd and =1 for no_rnd version |
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
501 __asm __volatile( |
2979 | 502 "movq (%1), %%mm0 \n\t" |
503 "movq 1(%1), %%mm4 \n\t" | |
504 "movq %%mm0, %%mm1 \n\t" | |
505 "movq %%mm4, %%mm5 \n\t" | |
506 "punpcklbw %%mm7, %%mm0 \n\t" | |
507 "punpcklbw %%mm7, %%mm4 \n\t" | |
508 "punpckhbw %%mm7, %%mm1 \n\t" | |
509 "punpckhbw %%mm7, %%mm5 \n\t" | |
510 "paddusw %%mm0, %%mm4 \n\t" | |
511 "paddusw %%mm1, %%mm5 \n\t" | |
512 "xor %%"REG_a", %%"REG_a" \n\t" | |
513 "add %3, %1 \n\t" | |
3576
f7125bf10892
Support for MacIntel, last part: balign directives
gpoirier
parents:
3036
diff
changeset
|
514 ASMALIGN(3) |
2979 | 515 "1: \n\t" |
516 "movq (%1, %%"REG_a"), %%mm0 \n\t" | |
517 "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | |
518 "movq %%mm0, %%mm1 \n\t" | |
519 "movq %%mm2, %%mm3 \n\t" | |
520 "punpcklbw %%mm7, %%mm0 \n\t" | |
521 "punpcklbw %%mm7, %%mm2 \n\t" | |
522 "punpckhbw %%mm7, %%mm1 \n\t" | |
523 "punpckhbw %%mm7, %%mm3 \n\t" | |
524 "paddusw %%mm2, %%mm0 \n\t" | |
525 "paddusw %%mm3, %%mm1 \n\t" | |
526 "paddusw %%mm6, %%mm4 \n\t" | |
527 "paddusw %%mm6, %%mm5 \n\t" | |
528 "paddusw %%mm0, %%mm4 \n\t" | |
529 "paddusw %%mm1, %%mm5 \n\t" | |
530 "psrlw $2, %%mm4 \n\t" | |
531 "psrlw $2, %%mm5 \n\t" | |
532 "movq (%2, %%"REG_a"), %%mm3 \n\t" | |
533 "packuswb %%mm5, %%mm4 \n\t" | |
534 "pcmpeqd %%mm2, %%mm2 \n\t" | |
535 "paddb %%mm2, %%mm2 \n\t" | |
536 PAVGB(%%mm3, %%mm4, %%mm5, %%mm2) | |
537 "movq %%mm5, (%2, %%"REG_a") \n\t" | |
538 "add %3, %%"REG_a" \n\t" | |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
539 |
2979 | 540 "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 |
541 "movq 1(%1, %%"REG_a"), %%mm4 \n\t" | |
542 "movq %%mm2, %%mm3 \n\t" | |
543 "movq %%mm4, %%mm5 \n\t" | |
544 "punpcklbw %%mm7, %%mm2 \n\t" | |
545 "punpcklbw %%mm7, %%mm4 \n\t" | |
546 "punpckhbw %%mm7, %%mm3 \n\t" | |
547 "punpckhbw %%mm7, %%mm5 \n\t" | |
548 "paddusw %%mm2, %%mm4 \n\t" | |
549 "paddusw %%mm3, %%mm5 \n\t" | |
550 "paddusw %%mm6, %%mm0 \n\t" | |
551 "paddusw %%mm6, %%mm1 \n\t" | |
552 "paddusw %%mm4, %%mm0 \n\t" | |
553 "paddusw %%mm5, %%mm1 \n\t" | |
554 "psrlw $2, %%mm0 \n\t" | |
555 "psrlw $2, %%mm1 \n\t" | |
556 "movq (%2, %%"REG_a"), %%mm3 \n\t" | |
557 "packuswb %%mm1, %%mm0 \n\t" | |
558 "pcmpeqd %%mm2, %%mm2 \n\t" | |
559 "paddb %%mm2, %%mm2 \n\t" | |
560 PAVGB(%%mm3, %%mm0, %%mm1, %%mm2) | |
561 "movq %%mm1, (%2, %%"REG_a") \n\t" | |
562 "add %3, %%"REG_a" \n\t" | |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
563 |
2979 | 564 "subl $2, %0 \n\t" |
565 "jnz 1b \n\t" | |
566 :"+g"(h), "+S"(pixels) | |
567 :"D"(block), "r"((long)line_size) | |
568 :REG_a, "memory"); | |
470
b94e82d31b06
* implemented remaing avg_ pixel functions (these are not used offen)
kabi
parents:
448
diff
changeset
|
569 } |
651 | 570 |
571 //FIXME optimize | |
1064 | 572 static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
651 | 573 DEF(put, pixels8_y2)(block , pixels , line_size, h); |
574 DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h); | |
575 } | |
576 | |
1064 | 577 static void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
651 | 578 DEF(put, pixels8_xy2)(block , pixels , line_size, h); |
579 DEF(put, pixels8_xy2)(block+8, pixels+8, line_size, h); | |
580 } | |
581 | |
1064 | 582 static void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
651 | 583 DEF(avg, pixels8_y2)(block , pixels , line_size, h); |
584 DEF(avg, pixels8_y2)(block+8, pixels+8, line_size, h); | |
585 } | |
586 | |
1064 | 587 static void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
651 | 588 DEF(avg, pixels8_xy2)(block , pixels , line_size, h); |
589 DEF(avg, pixels8_xy2)(block+8, pixels+8, line_size, h); | |
590 } | |
591 | |
592 |