Mercurial > mplayer.hg
annotate libmpcodecs/native/rtjpegn.c @ 31597:1eb8dc8f96fa
Make subdelay handling work the same way for all subtitle types and also allow
changing subtitle delay to work better with vobsubs.
This probably breaks vobsub behaviour with timestamp wrapping though.
author | reimar |
---|---|
date | Sat, 10 Jul 2010 12:53:05 +0000 |
parents | 0f1b5b68af32 |
children |
rev | line source |
---|---|
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1 /* |
3802 | 2 RTjpeg (C) Justin Schoeman 1998 (justin@suntiger.ee.up.ac.za) |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
3 |
3802 | 4 With modifications by: |
5 (c) 1998, 1999 by Joerg Walter <trouble@moes.pmnet.uni-oldenburg.de> | |
6 and | |
7 (c) 1999 by Wim Taymans <wim.taymans@tvd.be> | |
8 | |
9 This program is free software; you can redistribute it and/or modify | |
10 it under the terms of the GNU General Public License as published by | |
11 the Free Software Foundation; either version 2 of the License, or | |
12 (at your option) any later version. | |
13 | |
14 This program is distributed in the hope that it will be useful, | |
15 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 GNU General Public License for more details. | |
18 | |
19 You should have received a copy of the GNU General Public License | |
20 along with this program; if not, write to the Free Software | |
21977
cea0eb833758
Fix FSF address and otherwise broken license headers.
diego
parents:
21507
diff
changeset
|
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
3802 | 22 */ |
23 | |
24 #include <stdio.h> | |
25 #include <stdlib.h> | |
26 #include <string.h> | |
3805 | 27 |
28 #include "config.h" | |
29 | |
21507
fa99b3d31d13
Hack around libavutil/bswap.h compilation problems due to always_inline undefined.
reimar
parents:
21372
diff
changeset
|
30 #include "mpbswap.h" |
26304
5f526e8e3988
Rename RTJPEG files so that filenames consist of lowercase name only.
diego
parents:
26280
diff
changeset
|
31 #include "rtjpegn.h" |
3802 | 32 |
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
33 #if HAVE_MMX |
3802 | 34 #include "mmx.h" |
35 #endif | |
36 | |
37 //#define SHOWBLOCK 1 | |
38 #define BETTERCOMPRESSION 1 | |
39 | |
40 static const unsigned char RTjpeg_ZZ[64]={ | |
41 0, | |
42 8, 1, | |
43 2, 9, 16, | |
44 24, 17, 10, 3, | |
45 4, 11, 18, 25, 32, | |
46 40, 33, 26, 19, 12, 5, | |
47 6, 13, 20, 27, 34, 41, 48, | |
48 56, 49, 42, 35, 28, 21, 14, 7, | |
49 15, 22, 29, 36, 43, 50, 57, | |
50 58, 51, 44, 37, 30, 23, | |
51 31, 38, 45, 52, 59, | |
52 60, 53, 46, 39, | |
53 47, 54, 61, | |
54 62, 55, | |
55 63 }; | |
56 | |
57 static const __u64 RTjpeg_aan_tab[64]={ | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
58 4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL, |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
59 5957222912ULL, 8263040512ULL, 7783580160ULL, 7005009920ULL, 5957222912ULL, 4680582144ULL, 3224107520ULL, 1643641088ULL, |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
60 5611718144ULL, 7783580160ULL, 7331904512ULL, 6598688768ULL, 5611718144ULL, 4408998912ULL, 3036936960ULL, 1548224000ULL, |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
61 5050464768ULL, 7005009920ULL, 6598688768ULL, 5938608128ULL, 5050464768ULL, 3968072960ULL, 2733115392ULL, 1393296000ULL, |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
62 4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL, |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
63 3374581504ULL, 4680582144ULL, 4408998912ULL, 3968072960ULL, 3374581504ULL, 2651326208ULL, 1826357504ULL, 931136000ULL, |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
64 2324432128ULL, 3224107520ULL, 3036936960ULL, 2733115392ULL, 2324432128ULL, 1826357504ULL, 1258030336ULL, 641204288ULL, |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
65 1184891264ULL, 1643641088ULL, 1548224000ULL, 1393296000ULL, 1184891264ULL, 931136000ULL, 641204288ULL, 326894240ULL, |
3802 | 66 }; |
67 | |
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
68 #if !HAVE_MMX |
3802 | 69 static __s32 RTjpeg_ws[64+31]; |
70 #endif | |
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
71 static __u8 RTjpeg_alldata[2*64+4*64+4*64+4*64+4*64+32]; |
3802 | 72 |
3835 | 73 static __s16 *block; // rh |
74 static __s16 *RTjpeg_block; | |
75 static __s32 *RTjpeg_lqt; | |
76 static __s32 *RTjpeg_cqt; | |
77 static __u32 *RTjpeg_liqt; | |
78 static __u32 *RTjpeg_ciqt; | |
79 | |
80 static unsigned char RTjpeg_lb8; | |
81 static unsigned char RTjpeg_cb8; | |
82 static int RTjpeg_width, RTjpeg_height; | |
83 static int RTjpeg_Ywidth, RTjpeg_Cwidth; | |
84 static int RTjpeg_Ysize, RTjpeg_Csize; | |
85 | |
86 static __s16 *RTjpeg_old=NULL; | |
3802 | 87 |
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
88 #if HAVE_MMX |
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
89 static mmx_t RTjpeg_lmask; |
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
90 static mmx_t RTjpeg_cmask; |
3802 | 91 #else |
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
92 static __u16 RTjpeg_lmask; |
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
93 static __u16 RTjpeg_cmask; |
3802 | 94 #endif |
95 | |
96 static const unsigned char RTjpeg_lum_quant_tbl[64] = { | |
97 16, 11, 10, 16, 24, 40, 51, 61, | |
98 12, 12, 14, 19, 26, 58, 60, 55, | |
99 14, 13, 16, 24, 40, 57, 69, 56, | |
100 14, 17, 22, 29, 51, 87, 80, 62, | |
101 18, 22, 37, 56, 68, 109, 103, 77, | |
102 24, 35, 55, 64, 81, 104, 113, 92, | |
103 49, 64, 78, 87, 103, 121, 120, 101, | |
104 72, 92, 95, 98, 112, 100, 103, 99 | |
105 }; | |
106 | |
107 static const unsigned char RTjpeg_chrom_quant_tbl[64] = { | |
108 17, 18, 24, 47, 99, 99, 99, 99, | |
109 18, 21, 26, 66, 99, 99, 99, 99, | |
110 24, 26, 56, 99, 99, 99, 99, 99, | |
111 47, 66, 99, 99, 99, 99, 99, 99, | |
112 99, 99, 99, 99, 99, 99, 99, 99, | |
113 99, 99, 99, 99, 99, 99, 99, 99, | |
114 99, 99, 99, 99, 99, 99, 99, 99, | |
115 99, 99, 99, 99, 99, 99, 99, 99 | |
116 }; | |
117 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
118 #ifdef BETTERCOMPRESSION |
3802 | 119 |
120 /*--------------------------------------------------*/ | |
121 /* better encoding, but needs a lot more cpu time */ | |
122 /* seems to be more effective than old method +lzo */ | |
123 /* with this encoding lzo isn't efficient anymore */ | |
124 /* there is still more potential for better */ | |
125 /* encoding but that would need even more cputime */ | |
126 /* anyway your mileage may vary */ | |
127 /* */ | |
128 /* written by Martin BIELY and Roman HOCHLEITNER */ | |
129 /*--------------------------------------------------*/ | |
130 | |
131 /* +++++++++++++++++++++++++++++++++++++++++++++++++++*/ | |
132 /* Block to Stream (encoding) */ | |
133 /* */ | |
134 | |
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
135 static int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8) |
3802 | 136 { |
137 register int ci, co=1; | |
138 register __s16 ZZvalue; | |
139 register unsigned char bitten; | |
140 register unsigned char bitoff; | |
141 | |
142 #ifdef SHOWBLOCK | |
143 | |
144 int ii; | |
145 for (ii=0; ii < 64; ii++) { | |
146 fprintf(stdout, "%d ", data[RTjpeg_ZZ[ii]]); | |
147 } | |
148 fprintf(stdout, "\n\n"); | |
149 | |
150 #endif | |
151 | |
152 // first byte allways written | |
12378 | 153 ((__u8*)strm)[0]= |
3802 | 154 (__u8)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]); |
155 | |
156 | |
157 ci=63; | |
158 while (data[RTjpeg_ZZ[ci]]==0 && ci>0) ci--; | |
159 | |
160 bitten = ((unsigned char)ci) << 2; | |
161 | |
162 if (ci==0) { | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
163 ((__u8*)strm)[1]= bitten; |
3802 | 164 co = 2; |
165 return (int)co; | |
166 } | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
167 |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
168 /* bitoff=0 because the high 6bit contain first non zero position */ |
3802 | 169 bitoff = 0; |
170 co = 1; | |
171 | |
172 for(; ci>0; ci--) { | |
173 | |
174 ZZvalue = data[RTjpeg_ZZ[ci]]; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
175 |
3802 | 176 switch(ZZvalue) { |
177 case 0: | |
178 break; | |
179 case 1: | |
180 bitten |= (0x01<<bitoff); | |
181 break; | |
182 case -1: | |
183 bitten |= (0x03<<bitoff); | |
184 break; | |
185 default: | |
186 bitten |= (0x02<<bitoff); | |
187 goto HERZWEH; | |
188 break; | |
189 } | |
190 | |
191 if( bitoff == 0 ) { | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
192 ((__u8*)strm)[co]= bitten; |
3802 | 193 bitten = 0; |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
194 bitoff = 8; |
3802 | 195 co++; |
196 } /* "fall through" */ | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
197 bitoff-=2; |
3802 | 198 |
199 } | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
200 |
3802 | 201 /* ci must be 0 */ |
202 if(bitoff != 6) { | |
203 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
204 ((__u8*)strm)[co]= bitten; |
3802 | 205 co++; |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
206 |
3802 | 207 } |
208 goto BAUCHWEH; | |
209 | |
210 HERZWEH: | |
211 /* ci cannot be 0 */ | |
212 /* correct bitoff to nibble boundaries */ | |
213 | |
214 switch(bitoff){ | |
215 case 4: | |
216 case 6: | |
217 bitoff = 0; | |
218 break; | |
219 case 2: | |
220 case 0: | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
221 ((__u8*)strm)[co]= bitten; |
3802 | 222 bitoff = 4; |
223 co++; | |
224 bitten = 0; // clear half nibble values in bitten | |
225 break; | |
226 default: | |
227 break; | |
228 } | |
229 | |
230 for(; ci>0; ci--) { | |
231 | |
232 ZZvalue = data[RTjpeg_ZZ[ci]]; | |
233 | |
234 if( (ZZvalue > 7) || (ZZvalue < -7) ) { | |
235 bitten |= (0x08<<bitoff); | |
236 goto HIRNWEH; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
237 } |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
238 |
3802 | 239 bitten |= (ZZvalue&0xf)<<bitoff; |
240 | |
241 if( bitoff == 0 ) { | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
242 ((__u8*)strm)[co]= bitten; |
3802 | 243 bitten = 0; |
244 bitoff = 8; | |
245 co++; | |
246 } /* "fall thru" */ | |
247 bitoff-=4; | |
248 } | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
249 |
3802 | 250 /* ci must be 0 */ |
251 if( bitoff == 0 ) { | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
252 ((__u8*)strm)[co]= bitten; |
3802 | 253 co++; |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
254 } |
3802 | 255 goto BAUCHWEH; |
256 | |
257 HIRNWEH: | |
258 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
259 ((__u8*)strm)[co]= bitten; |
3802 | 260 co++; |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
261 |
3802 | 262 |
263 /* bitting is over now we bite */ | |
264 for(; ci>0; ci--) { | |
265 | |
266 ZZvalue = data[RTjpeg_ZZ[ci]]; | |
267 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
268 if(ZZvalue>0) |
3802 | 269 { |
270 strm[co++]=(__s8)(ZZvalue>127)?127:ZZvalue; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
271 } |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
272 else |
3802 | 273 { |
274 strm[co++]=(__s8)(ZZvalue<-128)?-128:ZZvalue; | |
275 } | |
276 | |
277 } | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
278 |
3802 | 279 |
280 BAUCHWEH: | |
281 /* we gotoo much now we are ill */ | |
282 #ifdef SHOWBLOCK | |
283 { | |
284 int i; | |
285 fprintf(stdout, "\nco = '%d'\n", co); | |
286 for (i=0; i < co+2; i++) { | |
287 fprintf(stdout, "%d ", strm[i]); | |
288 } | |
289 fprintf(stdout, "\n\n"); | |
290 } | |
291 #endif | |
292 | |
293 return (int)co; | |
294 } | |
295 | |
296 #else | |
297 | |
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
298 static int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8) |
3802 | 299 { |
300 register int ci, co=1, tmp; | |
301 register __s16 ZZvalue; | |
302 | |
303 #ifdef SHOWBLOCK | |
304 | |
305 int ii; | |
306 for (ii=0; ii < 64; ii++) { | |
307 fprintf(stdout, "%d ", data[RTjpeg_ZZ[ii]]); | |
308 } | |
309 fprintf(stdout, "\n\n"); | |
310 | |
311 #endif | |
312 | |
313 (__u8)strm[0]=(__u8)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]); | |
314 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
315 for(ci=1; ci<=bt8; ci++) |
3802 | 316 { |
317 ZZvalue = data[RTjpeg_ZZ[ci]]; | |
318 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
319 if(ZZvalue>0) |
3802 | 320 { |
321 strm[co++]=(__s8)(ZZvalue>127)?127:ZZvalue; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
322 } |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
323 else |
3802 | 324 { |
325 strm[co++]=(__s8)(ZZvalue<-128)?-128:ZZvalue; | |
326 } | |
327 } | |
328 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
329 for(; ci<64; ci++) |
3802 | 330 { |
331 ZZvalue = data[RTjpeg_ZZ[ci]]; | |
332 | |
333 if(ZZvalue>0) | |
334 { | |
335 strm[co++]=(__s8)(ZZvalue>63)?63:ZZvalue; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
336 } |
3802 | 337 else if(ZZvalue<0) |
338 { | |
339 strm[co++]=(__s8)(ZZvalue<-64)?-64:ZZvalue; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
340 } |
3802 | 341 else /* compress zeros */ |
342 { | |
343 tmp=ci; | |
344 do | |
345 { | |
346 ci++; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
347 } |
3802 | 348 while((ci<64)&&(data[RTjpeg_ZZ[ci]]==0)); |
349 | |
350 strm[co++]=(__s8)(63+(ci-tmp)); | |
351 ci--; | |
352 } | |
353 } | |
354 return (int)co; | |
355 } | |
356 | |
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
357 static int RTjpeg_s2b(__s16 *data, __s8 *strm, __u8 bt8, __u32 *qtbl) |
3802 | 358 { |
359 int ci=1, co=1, tmp; | |
360 register int i; | |
361 | |
362 i=RTjpeg_ZZ[0]; | |
363 data[i]=((__u8)strm[0])*qtbl[i]; | |
364 | |
365 for(co=1; co<=bt8; co++) | |
366 { | |
367 i=RTjpeg_ZZ[co]; | |
368 data[i]=strm[ci++]*qtbl[i]; | |
369 } | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
370 |
3802 | 371 for(; co<64; co++) |
372 { | |
373 if(strm[ci]>63) | |
374 { | |
375 tmp=co+strm[ci]-63; | |
376 for(; co<tmp; co++)data[RTjpeg_ZZ[co]]=0; | |
377 co--; | |
378 } else | |
379 { | |
380 i=RTjpeg_ZZ[co]; | |
381 data[i]=strm[ci]*qtbl[i]; | |
382 } | |
383 ci++; | |
384 } | |
385 return (int)ci; | |
386 } | |
387 #endif | |
388 | |
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
389 #if HAVE_MMX |
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
390 static void RTjpeg_quant_init(void) |
3802 | 391 { |
392 int i; | |
393 __s16 *qtbl; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
394 |
3802 | 395 qtbl=(__s16 *)RTjpeg_lqt; |
396 for(i=0; i<64; i++)qtbl[i]=(__s16)RTjpeg_lqt[i]; | |
397 | |
398 qtbl=(__s16 *)RTjpeg_cqt; | |
399 for(i=0; i<64; i++)qtbl[i]=(__s16)RTjpeg_cqt[i]; | |
400 } | |
401 | |
12928 | 402 static mmx_t RTjpeg_ones={0x0001000100010001LL}; |
403 static mmx_t RTjpeg_half={0x7fff7fff7fff7fffLL}; | |
3802 | 404 |
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
405 static void RTjpeg_quant(__s16 *block, __s32 *qtbl) |
3802 | 406 { |
407 int i; | |
408 mmx_t *bl, *ql; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
409 |
3802 | 410 ql=(mmx_t *)qtbl; |
411 bl=(mmx_t *)block; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
412 |
3802 | 413 movq_m2r(RTjpeg_ones, mm6); |
414 movq_m2r(RTjpeg_half, mm7); | |
415 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
416 for(i=16; i; i--) |
3802 | 417 { |
418 movq_m2r(*(ql++), mm0); /* quant vals (4) */ | |
419 movq_m2r(*bl, mm2); /* block vals (4) */ | |
420 movq_r2r(mm0, mm1); | |
421 movq_r2r(mm2, mm3); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
422 |
3802 | 423 punpcklwd_r2r(mm6, mm0); /* 1 qb 1 qa */ |
424 punpckhwd_r2r(mm6, mm1); /* 1 qd 1 qc */ | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
425 |
3802 | 426 punpcklwd_r2r(mm7, mm2); /* 32767 bb 32767 ba */ |
427 punpckhwd_r2r(mm7, mm3); /* 32767 bd 32767 bc */ | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
428 |
3802 | 429 pmaddwd_r2r(mm2, mm0); /* 32767+bb*qb 32767+ba*qa */ |
430 pmaddwd_r2r(mm3, mm1); /* 32767+bd*qd 32767+bc*qc */ | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
431 |
3802 | 432 psrad_i2r(16, mm0); |
433 psrad_i2r(16, mm1); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
434 |
3802 | 435 packssdw_r2r(mm1, mm0); |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
436 |
3802 | 437 movq_r2m(mm0, *(bl++)); |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
438 |
3802 | 439 } |
440 } | |
441 #else | |
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
442 static void RTjpeg_quant_init(void) |
3802 | 443 { |
444 } | |
445 | |
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
446 static void RTjpeg_quant(__s16 *block, __s32 *qtbl) |
3802 | 447 { |
448 int i; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
449 |
3802 | 450 for(i=0; i<64; i++) |
451 block[i]=(__s16)((block[i]*qtbl[i]+32767)>>16); | |
452 } | |
453 #endif | |
454 | |
455 /* | |
456 * Perform the forward DCT on one block of samples. | |
457 */ | |
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
458 #if HAVE_MMX |
12928 | 459 static mmx_t RTjpeg_C4 ={0x2D412D412D412D41LL}; |
460 static mmx_t RTjpeg_C6 ={0x187E187E187E187ELL}; | |
461 static mmx_t RTjpeg_C2mC6={0x22A322A322A322A3LL}; | |
462 static mmx_t RTjpeg_C2pC6={0x539F539F539F539FLL}; | |
463 static mmx_t RTjpeg_zero ={0x0000000000000000LL}; | |
3802 | 464 |
465 #else | |
466 | |
467 #define FIX_0_382683433 ((__s32) 98) /* FIX(0.382683433) */ | |
468 #define FIX_0_541196100 ((__s32) 139) /* FIX(0.541196100) */ | |
469 #define FIX_0_707106781 ((__s32) 181) /* FIX(0.707106781) */ | |
470 #define FIX_1_306562965 ((__s32) 334) /* FIX(1.306562965) */ | |
471 | |
472 #define DESCALE10(x) (__s16)( ((x)+128) >> 8) | |
473 #define DESCALE20(x) (__s16)(((x)+32768) >> 16) | |
474 #define D_MULTIPLY(var,const) ((__s32) ((var) * (const))) | |
475 #endif | |
476 | |
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
477 static void RTjpeg_dct_init(void) |
3802 | 478 { |
479 int i; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
480 |
3802 | 481 for(i=0; i<64; i++) |
482 { | |
483 RTjpeg_lqt[i]=(((__u64)RTjpeg_lqt[i]<<32)/RTjpeg_aan_tab[i]); | |
484 RTjpeg_cqt[i]=(((__u64)RTjpeg_cqt[i]<<32)/RTjpeg_aan_tab[i]); | |
485 } | |
486 } | |
487 | |
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
488 static void RTjpeg_dctY(__u8 *idata, __s16 *odata, int rskip) |
3802 | 489 { |
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
490 #if !HAVE_MMX |
3802 | 491 __s32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
492 __s32 tmp10, tmp11, tmp12, tmp13; | |
493 __s32 z1, z2, z3, z4, z5, z11, z13; | |
494 __u8 *idataptr; | |
495 __s16 *odataptr; | |
496 __s32 *wsptr; | |
497 int ctr; | |
498 | |
499 idataptr = idata; | |
500 wsptr = RTjpeg_ws; | |
501 for (ctr = 7; ctr >= 0; ctr--) { | |
502 tmp0 = idataptr[0] + idataptr[7]; | |
503 tmp7 = idataptr[0] - idataptr[7]; | |
504 tmp1 = idataptr[1] + idataptr[6]; | |
505 tmp6 = idataptr[1] - idataptr[6]; | |
506 tmp2 = idataptr[2] + idataptr[5]; | |
507 tmp5 = idataptr[2] - idataptr[5]; | |
508 tmp3 = idataptr[3] + idataptr[4]; | |
509 tmp4 = idataptr[3] - idataptr[4]; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
510 |
3802 | 511 tmp10 = (tmp0 + tmp3); /* phase 2 */ |
512 tmp13 = tmp0 - tmp3; | |
513 tmp11 = (tmp1 + tmp2); | |
514 tmp12 = tmp1 - tmp2; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
515 |
3802 | 516 wsptr[0] = (tmp10 + tmp11)<<8; /* phase 3 */ |
517 wsptr[4] = (tmp10 - tmp11)<<8; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
518 |
3802 | 519 z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ |
520 wsptr[2] = (tmp13<<8) + z1; /* phase 5 */ | |
521 wsptr[6] = (tmp13<<8) - z1; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
522 |
3802 | 523 tmp10 = tmp4 + tmp5; /* phase 2 */ |
524 tmp11 = tmp5 + tmp6; | |
525 tmp12 = tmp6 + tmp7; | |
526 | |
527 z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ | |
528 z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ | |
529 z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ | |
530 z3 = D_MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ | |
531 | |
532 z11 = (tmp7<<8) + z3; /* phase 5 */ | |
533 z13 = (tmp7<<8) - z3; | |
534 | |
535 wsptr[5] = z13 + z2; /* phase 6 */ | |
536 wsptr[3] = z13 - z2; | |
537 wsptr[1] = z11 + z4; | |
538 wsptr[7] = z11 - z4; | |
539 | |
540 idataptr += rskip<<3; /* advance pointer to next row */ | |
541 wsptr += 8; | |
542 } | |
543 | |
544 wsptr = RTjpeg_ws; | |
545 odataptr=odata; | |
546 for (ctr = 7; ctr >= 0; ctr--) { | |
547 tmp0 = wsptr[0] + wsptr[56]; | |
548 tmp7 = wsptr[0] - wsptr[56]; | |
549 tmp1 = wsptr[8] + wsptr[48]; | |
550 tmp6 = wsptr[8] - wsptr[48]; | |
551 tmp2 = wsptr[16] + wsptr[40]; | |
552 tmp5 = wsptr[16] - wsptr[40]; | |
553 tmp3 = wsptr[24] + wsptr[32]; | |
554 tmp4 = wsptr[24] - wsptr[32]; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
555 |
3802 | 556 tmp10 = tmp0 + tmp3; /* phase 2 */ |
557 tmp13 = tmp0 - tmp3; | |
558 tmp11 = tmp1 + tmp2; | |
559 tmp12 = tmp1 - tmp2; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
560 |
3802 | 561 odataptr[0] = DESCALE10(tmp10 + tmp11); /* phase 3 */ |
562 odataptr[32] = DESCALE10(tmp10 - tmp11); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
563 |
3802 | 564 z1 = D_MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ |
565 odataptr[16] = DESCALE20((tmp13<<8) + z1); /* phase 5 */ | |
566 odataptr[48] = DESCALE20((tmp13<<8) - z1); | |
567 | |
568 tmp10 = tmp4 + tmp5; /* phase 2 */ | |
569 tmp11 = tmp5 + tmp6; | |
570 tmp12 = tmp6 + tmp7; | |
571 | |
572 z5 = D_MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ | |
573 z2 = D_MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ | |
574 z4 = D_MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ | |
575 z3 = D_MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ | |
576 | |
577 z11 = (tmp7<<8) + z3; /* phase 5 */ | |
578 z13 = (tmp7<<8) - z3; | |
579 | |
580 odataptr[40] = DESCALE20(z13 + z2); /* phase 6 */ | |
581 odataptr[24] = DESCALE20(z13 - z2); | |
582 odataptr[8] = DESCALE20(z11 + z4); | |
583 odataptr[56] = DESCALE20(z11 - z4); | |
584 | |
585 odataptr++; /* advance pointer to next column */ | |
586 wsptr++; | |
587 } | |
588 #else | |
589 volatile mmx_t tmp6, tmp7; | |
590 register mmx_t *dataptr = (mmx_t *)odata; | |
591 mmx_t *idata2 = (mmx_t *)idata; | |
592 | |
593 // first copy the input 8 bit to the destination 16 bits | |
594 | |
595 movq_m2r(RTjpeg_zero, mm2); | |
596 | |
597 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
598 movq_m2r(*idata2, mm0); |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
599 movq_r2r(mm0, mm1); |
3802 | 600 |
601 punpcklbw_r2r(mm2, mm0); | |
602 movq_r2m(mm0, *(dataptr)); | |
603 | |
604 punpckhbw_r2r(mm2, mm1); | |
605 movq_r2m(mm1, *(dataptr+1)); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
606 |
3802 | 607 idata2 += rskip; |
608 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
609 movq_m2r(*idata2, mm0); |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
610 movq_r2r(mm0, mm1); |
3802 | 611 |
612 punpcklbw_r2r(mm2, mm0); | |
613 movq_r2m(mm0, *(dataptr+2)); | |
614 | |
615 punpckhbw_r2r(mm2, mm1); | |
616 movq_r2m(mm1, *(dataptr+3)); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
617 |
3802 | 618 idata2 += rskip; |
619 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
620 movq_m2r(*idata2, mm0); |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
621 movq_r2r(mm0, mm1); |
3802 | 622 |
623 punpcklbw_r2r(mm2, mm0); | |
624 movq_r2m(mm0, *(dataptr+4)); | |
625 | |
626 punpckhbw_r2r(mm2, mm1); | |
627 movq_r2m(mm1, *(dataptr+5)); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
628 |
3802 | 629 idata2 += rskip; |
630 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
631 movq_m2r(*idata2, mm0); |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
632 movq_r2r(mm0, mm1); |
3802 | 633 |
634 punpcklbw_r2r(mm2, mm0); | |
635 movq_r2m(mm0, *(dataptr+6)); | |
636 | |
637 punpckhbw_r2r(mm2, mm1); | |
638 movq_r2m(mm1, *(dataptr+7)); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
639 |
3802 | 640 idata2 += rskip; |
641 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
642 movq_m2r(*idata2, mm0); |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
643 movq_r2r(mm0, mm1); |
3802 | 644 |
645 punpcklbw_r2r(mm2, mm0); | |
646 movq_r2m(mm0, *(dataptr+8)); | |
647 | |
648 punpckhbw_r2r(mm2, mm1); | |
649 movq_r2m(mm1, *(dataptr+9)); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
650 |
3802 | 651 idata2 += rskip; |
652 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
653 movq_m2r(*idata2, mm0); |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
654 movq_r2r(mm0, mm1); |
3802 | 655 |
656 punpcklbw_r2r(mm2, mm0); | |
657 movq_r2m(mm0, *(dataptr+10)); | |
658 | |
659 punpckhbw_r2r(mm2, mm1); | |
660 movq_r2m(mm1, *(dataptr+11)); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
661 |
3802 | 662 idata2 += rskip; |
663 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
664 movq_m2r(*idata2, mm0); |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
665 movq_r2r(mm0, mm1); |
3802 | 666 |
667 punpcklbw_r2r(mm2, mm0); | |
668 movq_r2m(mm0, *(dataptr+12)); | |
669 | |
670 punpckhbw_r2r(mm2, mm1); | |
671 movq_r2m(mm1, *(dataptr+13)); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
672 |
3802 | 673 idata2 += rskip; |
674 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
675 movq_m2r(*idata2, mm0); |
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
676 movq_r2r(mm0, mm1); |
3802 | 677 |
678 punpcklbw_r2r(mm2, mm0); | |
679 movq_r2m(mm0, *(dataptr+14)); | |
680 | |
681 punpckhbw_r2r(mm2, mm1); | |
682 movq_r2m(mm1, *(dataptr+15)); | |
683 | |
684 /* Start Transpose to do calculations on rows */ | |
685 | |
686 movq_m2r(*(dataptr+9), mm7); // m03:m02|m01:m00 - first line (line 4)and copy into m5 | |
687 | |
688 movq_m2r(*(dataptr+13), mm6); // m23:m22|m21:m20 - third line (line 6)and copy into m2 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
689 movq_r2r(mm7, mm5); |
3802 | 690 |
691 punpcklwd_m2r(*(dataptr+11), mm7); // m11:m01|m10:m00 - interleave first and second lines | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
692 movq_r2r(mm6, mm2); |
3802 | 693 |
694 punpcklwd_m2r(*(dataptr+15), mm6); // m31:m21|m30:m20 - interleave third and fourth lines | |
695 movq_r2r(mm7, mm1); | |
696 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
697 movq_m2r(*(dataptr+11), mm3); // m13:m13|m11:m10 - second line |
3802 | 698 punpckldq_r2r(mm6, mm7); // m30:m20|m10:m00 - interleave to produce result 1 |
699 | |
700 movq_m2r(*(dataptr+15), mm0); // m13:m13|m11:m10 - fourth line | |
701 punpckhdq_r2r(mm6, mm1); // m31:m21|m11:m01 - interleave to produce result 2 | |
702 | |
703 movq_r2m(mm7,*(dataptr+9)); // write result 1 | |
704 punpckhwd_r2r(mm3, mm5); // m13:m03|m12:m02 - interleave first and second lines | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
705 |
3802 | 706 movq_r2m(mm1,*(dataptr+11)); // write result 2 |
707 punpckhwd_r2r(mm0, mm2); // m33:m23|m32:m22 - interleave third and fourth lines | |
708 | |
709 movq_r2r(mm5, mm1); | |
710 punpckldq_r2r(mm2, mm5); // m32:m22|m12:m02 - interleave to produce result 3 | |
711 | |
712 movq_m2r(*(dataptr+1), mm0); // m03:m02|m01:m00 - first line, 4x4 | |
713 punpckhdq_r2r(mm2, mm1); // m33:m23|m13:m03 - interleave to produce result 4 | |
714 | |
715 movq_r2m(mm5,*(dataptr+13)); // write result 3 | |
716 | |
717 // last 4x4 done | |
718 | |
719 movq_r2m(mm1, *(dataptr+15)); // write result 4, last 4x4 | |
720 | |
721 movq_m2r(*(dataptr+5), mm2); // m23:m22|m21:m20 - third line | |
722 movq_r2r(mm0, mm6); | |
723 | |
724 punpcklwd_m2r(*(dataptr+3), mm0); // m11:m01|m10:m00 - interleave first and second lines | |
725 movq_r2r(mm2, mm7); | |
726 | |
727 punpcklwd_m2r(*(dataptr+7), mm2); // m31:m21|m30:m20 - interleave third and fourth lines | |
728 movq_r2r(mm0, mm4); | |
729 | |
730 // | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
731 movq_m2r(*(dataptr+8), mm1); // n03:n02|n01:n00 - first line |
3802 | 732 punpckldq_r2r(mm2, mm0); // m30:m20|m10:m00 - interleave to produce first result |
733 | |
734 movq_m2r(*(dataptr+12), mm3); // n23:n22|n21:n20 - third line | |
735 punpckhdq_r2r(mm2, mm4); // m31:m21|m11:m01 - interleave to produce second result | |
736 | |
737 punpckhwd_m2r(*(dataptr+3), mm6); // m13:m03|m12:m02 - interleave first and second lines | |
738 movq_r2r(mm1, mm2); // copy first line | |
739 | |
740 punpckhwd_m2r(*(dataptr+7), mm7); // m33:m23|m32:m22 - interleave third and fourth lines | |
741 movq_r2r(mm6, mm5); // copy first intermediate result | |
742 | |
743 movq_r2m(mm0, *(dataptr+8)); // write result 1 | |
744 punpckhdq_r2r(mm7, mm5); // m33:m23|m13:m03 - produce third result | |
745 | |
746 punpcklwd_m2r(*(dataptr+10), mm1); // n11:n01|n10:n00 - interleave first and second lines | |
747 movq_r2r(mm3, mm0); // copy third line | |
748 | |
749 punpckhwd_m2r(*(dataptr+10), mm2); // n13:n03|n12:n02 - interleave first and second lines | |
750 | |
751 movq_r2m(mm4, *(dataptr+10)); // write result 2 out | |
752 punpckldq_r2r(mm7, mm6); // m32:m22|m12:m02 - produce fourth result | |
753 | |
754 punpcklwd_m2r(*(dataptr+14), mm3); // n31:n21|n30:n20 - interleave third and fourth lines | |
755 movq_r2r(mm1, mm4); | |
756 | |
757 movq_r2m(mm6, *(dataptr+12)); // write result 3 out | |
758 punpckldq_r2r(mm3, mm1); // n30:n20|n10:n00 - produce first result | |
759 | |
760 punpckhwd_m2r(*(dataptr+14), mm0); // n33:n23|n32:n22 - interleave third and fourth lines | |
761 movq_r2r(mm2, mm6); | |
762 | |
763 movq_r2m(mm5, *(dataptr+14)); // write result 4 out | |
764 punpckhdq_r2r(mm3, mm4); // n31:n21|n11:n01- produce second result | |
765 | |
766 movq_r2m(mm1, *(dataptr+1)); // write result 5 out - (first result for other 4 x 4 block) | |
767 punpckldq_r2r(mm0, mm2); // n32:n22|n12:n02- produce third result | |
768 | |
769 movq_r2m(mm4, *(dataptr+3)); // write result 6 out | |
770 punpckhdq_r2r(mm0, mm6); // n33:n23|n13:n03 - produce fourth result | |
771 | |
772 movq_r2m(mm2, *(dataptr+5)); // write result 7 out | |
773 | |
774 movq_m2r(*dataptr, mm0); // m03:m02|m01:m00 - first line, first 4x4 | |
775 | |
776 movq_r2m(mm6, *(dataptr+7)); // write result 8 out | |
777 | |
778 | |
779 // Do first 4x4 quadrant, which is used in the beginning of the DCT: | |
780 | |
781 movq_m2r(*(dataptr+4), mm7); // m23:m22|m21:m20 - third line | |
782 movq_r2r(mm0, mm2); | |
783 | |
784 punpcklwd_m2r(*(dataptr+2), mm0); // m11:m01|m10:m00 - interleave first and second lines | |
785 movq_r2r(mm7, mm4); | |
786 | |
787 punpcklwd_m2r(*(dataptr+6), mm7); // m31:m21|m30:m20 - interleave third and fourth lines | |
788 movq_r2r(mm0, mm1); | |
789 | |
790 movq_m2r(*(dataptr+2), mm6); // m13:m12|m11:m10 - second line | |
791 punpckldq_r2r(mm7, mm0); // m30:m20|m10:m00 - interleave to produce result 1 | |
792 | |
793 movq_m2r(*(dataptr+6), mm5); // m33:m32|m31:m30 - fourth line | |
794 punpckhdq_r2r(mm7, mm1); // m31:m21|m11:m01 - interleave to produce result 2 | |
795 | |
796 movq_r2r(mm0, mm7); // write result 1 | |
797 punpckhwd_r2r(mm6, mm2); // m13:m03|m12:m02 - interleave first and second lines | |
798 | |
799 psubw_m2r(*(dataptr+14), mm7); // tmp07=x0-x7 /* Stage 1 */ | |
800 movq_r2r(mm1, mm6); // write result 2 | |
801 | |
802 paddw_m2r(*(dataptr+14), mm0); // tmp00=x0+x7 /* Stage 1 */ | |
803 punpckhwd_r2r(mm5, mm4); // m33:m23|m32:m22 - interleave third and fourth lines | |
804 | |
805 paddw_m2r(*(dataptr+12), mm1); // tmp01=x1+x6 /* Stage 1 */ | |
806 movq_r2r(mm2, mm3); // copy first intermediate result | |
807 | |
808 psubw_m2r(*(dataptr+12), mm6); // tmp06=x1-x6 /* Stage 1 */ | |
809 punpckldq_r2r(mm4, mm2); // m32:m22|m12:m02 - interleave to produce result 3 | |
810 | |
811 movq_r2m(mm7, tmp7); | |
812 movq_r2r(mm2, mm5); // write result 3 | |
813 | |
814 movq_r2m(mm6, tmp6); | |
815 punpckhdq_r2r(mm4, mm3); // m33:m23|m13:m03 - interleave to produce result 4 | |
816 | |
817 paddw_m2r(*(dataptr+10), mm2); // tmp02=x2+5 /* Stage 1 */ | |
818 movq_r2r(mm3, mm4); // write result 4 | |
819 | |
820 /************************************************************************************************ | |
821 End of Transpose | |
822 ************************************************************************************************/ | |
823 | |
824 | |
825 paddw_m2r(*(dataptr+8), mm3); // tmp03=x3+x4 /* stage 1*/ | |
826 movq_r2r(mm0, mm7); | |
827 | |
828 psubw_m2r(*(dataptr+8), mm4); // tmp04=x3-x4 /* stage 1*/ | |
829 movq_r2r(mm1, mm6); | |
830 | |
831 paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 /* even 2 */ | |
832 psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 /* even 2 */ | |
833 | |
834 psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 /* even 2 */ | |
835 paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 /* even 2 */ | |
836 | |
837 psubw_m2r(*(dataptr+10), mm5); // tmp05=x2-x5 /* stage 1*/ | |
838 paddw_r2r(mm7, mm6); // tmp12 + tmp13 | |
839 | |
840 /* stage 3 */ | |
841 | |
842 movq_m2r(tmp6, mm2); | |
843 movq_r2r(mm0, mm3); | |
844 | |
845 psllw_i2r(2, mm6); // m8 * 2^2 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
846 paddw_r2r(mm1, mm0); |
3802 | 847 |
848 pmulhw_m2r(RTjpeg_C4, mm6); // z1 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
849 psubw_r2r(mm1, mm3); |
3802 | 850 |
851 movq_r2m(mm0, *dataptr); | |
852 movq_r2r(mm7, mm0); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
853 |
3802 | 854 /* Odd part */ |
855 movq_r2m(mm3, *(dataptr+8)); | |
856 paddw_r2r(mm5, mm4); // tmp10 | |
857 | |
858 movq_m2r(tmp7, mm3); | |
859 paddw_r2r(mm6, mm0); // tmp32 | |
860 | |
861 paddw_r2r(mm2, mm5); // tmp11 | |
862 psubw_r2r(mm6, mm7); // tmp33 | |
863 | |
864 movq_r2m(mm0, *(dataptr+4)); | |
865 paddw_r2r(mm3, mm2); // tmp12 | |
866 | |
867 /* stage 4 */ | |
868 | |
869 movq_r2m(mm7, *(dataptr+12)); | |
870 movq_r2r(mm4, mm1); // copy of tmp10 | |
871 | |
872 psubw_r2r(mm2, mm1); // tmp10 - tmp12 | |
873 psllw_i2r(2, mm4); // m8 * 2^2 | |
874 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
875 movq_m2r(RTjpeg_C2mC6, mm0); |
3802 | 876 psllw_i2r(2, mm1); |
877 | |
878 pmulhw_m2r(RTjpeg_C6, mm1); // z5 | |
879 psllw_i2r(2, mm2); | |
880 | |
881 pmulhw_r2r(mm0, mm4); // z5 | |
882 | |
883 /* stage 5 */ | |
884 | |
885 pmulhw_m2r(RTjpeg_C2pC6, mm2); | |
886 psllw_i2r(2, mm5); | |
887 | |
888 pmulhw_m2r(RTjpeg_C4, mm5); // z3 | |
889 movq_r2r(mm3, mm0); // copy tmp7 | |
890 | |
891 movq_m2r(*(dataptr+1), mm7); | |
892 paddw_r2r(mm1, mm4); // z2 | |
893 | |
894 paddw_r2r(mm1, mm2); // z4 | |
895 | |
896 paddw_r2r(mm5, mm0); // z11 | |
897 psubw_r2r(mm5, mm3); // z13 | |
898 | |
899 /* stage 6 */ | |
900 | |
901 movq_r2r(mm3, mm5); // copy z13 | |
902 psubw_r2r(mm4, mm3); // y3=z13 - z2 | |
903 | |
904 paddw_r2r(mm4, mm5); // y5=z13 + z2 | |
905 movq_r2r(mm0, mm6); // copy z11 | |
906 | |
907 movq_r2m(mm3, *(dataptr+6)); //save y3 | |
908 psubw_r2r(mm2, mm0); // y7=z11 - z4 | |
909 | |
910 movq_r2m(mm5, *(dataptr+10)); //save y5 | |
911 paddw_r2r(mm2, mm6); // y1=z11 + z4 | |
912 | |
913 movq_r2m(mm0, *(dataptr+14)); //save y7 | |
914 | |
915 /************************************************ | |
916 * End of 1st 4 rows | |
917 ************************************************/ | |
918 | |
919 movq_m2r(*(dataptr+3), mm1); // load x1 /* stage 1 */ | |
920 movq_r2r(mm7, mm0); // copy x0 | |
921 | |
922 movq_r2m(mm6, *(dataptr+2)); //save y1 | |
923 | |
924 movq_m2r(*(dataptr+5), mm2); // load x2 /* stage 1 */ | |
925 movq_r2r(mm1, mm6); // copy x1 | |
926 | |
927 paddw_m2r(*(dataptr+15), mm0); // tmp00 = x0 + x7 | |
928 | |
929 movq_m2r(*(dataptr+7), mm3); // load x3 /* stage 1 */ | |
930 movq_r2r(mm2, mm5); // copy x2 | |
931 | |
932 psubw_m2r(*(dataptr+15), mm7); // tmp07 = x0 - x7 | |
933 movq_r2r(mm3, mm4); // copy x3 | |
934 | |
935 paddw_m2r(*(dataptr+13), mm1); // tmp01 = x1 + x6 | |
936 | |
937 movq_r2m(mm7, tmp7); // save tmp07 | |
938 movq_r2r(mm0, mm7); // copy tmp00 | |
939 | |
940 psubw_m2r(*(dataptr+13), mm6); // tmp06 = x1 - x6 | |
941 | |
942 /* stage 2, Even Part */ | |
943 | |
944 paddw_m2r(*(dataptr+9), mm3); // tmp03 = x3 + x4 | |
945 | |
946 movq_r2m(mm6, tmp6); // save tmp07 | |
947 movq_r2r(mm1, mm6); // copy tmp01 | |
948 | |
949 paddw_m2r(*(dataptr+11), mm2); // tmp02 = x2 + x5 | |
950 paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 | |
951 | |
952 psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 | |
953 | |
954 psubw_m2r(*(dataptr+9), mm4); // tmp04 = x3 - x4 | |
955 psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 | |
956 | |
957 paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 | |
958 | |
959 psubw_m2r(*(dataptr+11), mm5); // tmp05 = x2 - x5 | |
960 paddw_r2r(mm7, mm6); // tmp12 + tmp13 | |
961 | |
962 /* stage 3, Even and stage 4 & 5 even */ | |
963 | |
964 movq_m2r(tmp6, mm2); // load tmp6 | |
965 movq_r2r(mm0, mm3); // copy tmp10 | |
966 | |
967 psllw_i2r(2, mm6); // shift z1 | |
968 paddw_r2r(mm1, mm0); // y0=tmp10 + tmp11 | |
969 | |
970 pmulhw_m2r(RTjpeg_C4, mm6); // z1 | |
971 psubw_r2r(mm1, mm3); // y4=tmp10 - tmp11 | |
972 | |
973 movq_r2m(mm0, *(dataptr+1)); //save y0 | |
974 movq_r2r(mm7, mm0); // copy tmp13 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
975 |
3802 | 976 /* odd part */ |
977 | |
978 movq_r2m(mm3, *(dataptr+9)); //save y4 | |
979 paddw_r2r(mm5, mm4); // tmp10 = tmp4 + tmp5 | |
980 | |
981 movq_m2r(tmp7, mm3); // load tmp7 | |
982 paddw_r2r(mm6, mm0); // tmp32 = tmp13 + z1 | |
983 | |
984 paddw_r2r(mm2, mm5); // tmp11 = tmp5 + tmp6 | |
985 psubw_r2r(mm6, mm7); // tmp33 = tmp13 - z1 | |
986 | |
987 movq_r2m(mm0, *(dataptr+5)); //save y2 | |
988 paddw_r2r(mm3, mm2); // tmp12 = tmp6 + tmp7 | |
989 | |
990 /* stage 4 */ | |
991 | |
992 movq_r2m(mm7, *(dataptr+13)); //save y6 | |
993 movq_r2r(mm4, mm1); // copy tmp10 | |
994 | |
995 psubw_r2r(mm2, mm1); // tmp10 - tmp12 | |
996 psllw_i2r(2, mm4); // shift tmp10 | |
997 | |
998 movq_m2r(RTjpeg_C2mC6, mm0); // load C2mC6 | |
999 psllw_i2r(2, mm1); // shift (tmp10-tmp12) | |
1000 | |
1001 pmulhw_m2r(RTjpeg_C6, mm1); // z5 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1002 psllw_i2r(2, mm5); // prepare for multiply |
3802 | 1003 |
1004 pmulhw_r2r(mm0, mm4); // multiply by converted real | |
1005 | |
1006 /* stage 5 */ | |
1007 | |
1008 pmulhw_m2r(RTjpeg_C4, mm5); // z3 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1009 psllw_i2r(2, mm2); // prepare for multiply |
3802 | 1010 |
1011 pmulhw_m2r(RTjpeg_C2pC6, mm2); // multiply | |
1012 movq_r2r(mm3, mm0); // copy tmp7 | |
1013 | |
1014 movq_m2r(*(dataptr+9), mm7); // m03:m02|m01:m00 - first line (line 4)and copy into mm7 | |
1015 paddw_r2r(mm1, mm4); // z2 | |
1016 | |
1017 paddw_r2r(mm5, mm0); // z11 | |
1018 psubw_r2r(mm5, mm3); // z13 | |
1019 | |
1020 /* stage 6 */ | |
1021 | |
1022 movq_r2r(mm3, mm5); // copy z13 | |
1023 paddw_r2r(mm1, mm2); // z4 | |
1024 | |
1025 movq_r2r(mm0, mm6); // copy z11 | |
1026 psubw_r2r(mm4, mm5); // y3 | |
1027 | |
1028 paddw_r2r(mm2, mm6); // y1 | |
1029 paddw_r2r(mm4, mm3); // y5 | |
1030 | |
1031 movq_r2m(mm5, *(dataptr+7)); //save y3 | |
1032 | |
1033 movq_r2m(mm6, *(dataptr+3)); //save y1 | |
1034 psubw_r2r(mm2, mm0); // y7 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1035 |
3802 | 1036 /************************************************************************************************ |
1037 Start of Transpose | |
1038 ************************************************************************************************/ | |
1039 | |
1040 movq_m2r(*(dataptr+13), mm6); // m23:m22|m21:m20 - third line (line 6)and copy into m2 | |
1041 movq_r2r(mm7, mm5); // copy first line | |
1042 | |
1043 punpcklwd_r2r(mm3, mm7); // m11:m01|m10:m00 - interleave first and second lines | |
1044 movq_r2r(mm6, mm2); // copy third line | |
1045 | |
1046 punpcklwd_r2r(mm0, mm6); // m31:m21|m30:m20 - interleave third and fourth lines | |
1047 movq_r2r(mm7, mm1); // copy first intermediate result | |
1048 | |
1049 punpckldq_r2r(mm6, mm7); // m30:m20|m10:m00 - interleave to produce result 1 | |
1050 | |
1051 punpckhdq_r2r(mm6, mm1); // m31:m21|m11:m01 - interleave to produce result 2 | |
1052 | |
1053 movq_r2m(mm7, *(dataptr+9)); // write result 1 | |
1054 punpckhwd_r2r(mm3, mm5); // m13:m03|m12:m02 - interleave first and second lines | |
1055 | |
1056 movq_r2m(mm1, *(dataptr+11)); // write result 2 | |
1057 punpckhwd_r2r(mm0, mm2); // m33:m23|m32:m22 - interleave third and fourth lines | |
1058 | |
1059 movq_r2r(mm5, mm1); // copy first intermediate result | |
1060 punpckldq_r2r(mm2, mm5); // m32:m22|m12:m02 - interleave to produce result 3 | |
1061 | |
1062 movq_m2r(*(dataptr+1), mm0); // m03:m02|m01:m00 - first line, 4x4 | |
1063 punpckhdq_r2r(mm2, mm1); // m33:m23|m13:m03 - interleave to produce result 4 | |
1064 | |
1065 movq_r2m(mm5, *(dataptr+13)); // write result 3 | |
1066 | |
1067 /****** last 4x4 done */ | |
1068 | |
1069 movq_r2m(mm1, *(dataptr+15)); // write result 4, last 4x4 | |
1070 | |
1071 movq_m2r(*(dataptr+5), mm2); // m23:m22|m21:m20 - third line | |
1072 movq_r2r(mm0, mm6); // copy first line | |
1073 | |
1074 punpcklwd_m2r(*(dataptr+3), mm0); // m11:m01|m10:m00 - interleave first and second lines | |
1075 movq_r2r(mm2, mm7); // copy third line | |
1076 | |
1077 punpcklwd_m2r(*(dataptr+7), mm2); // m31:m21|m30:m20 - interleave third and fourth lines | |
1078 movq_r2r(mm0, mm4); // copy first intermediate result | |
1079 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1080 |
3802 | 1081 |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1082 movq_m2r(*(dataptr+8), mm1); // n03:n02|n01:n00 - first line |
3802 | 1083 punpckldq_r2r(mm2, mm0); // m30:m20|m10:m00 - interleave to produce first result |
1084 | |
1085 movq_m2r(*(dataptr+12), mm3); // n23:n22|n21:n20 - third line | |
1086 punpckhdq_r2r(mm2, mm4); // m31:m21|m11:m01 - interleave to produce second result | |
1087 | |
1088 punpckhwd_m2r(*(dataptr+3), mm6); // m13:m03|m12:m02 - interleave first and second lines | |
1089 movq_r2r(mm1, mm2); // copy first line | |
1090 | |
1091 punpckhwd_m2r(*(dataptr+7), mm7); // m33:m23|m32:m22 - interleave third and fourth lines | |
1092 movq_r2r(mm6, mm5); // copy first intermediate result | |
1093 | |
1094 movq_r2m(mm0, *(dataptr+8)); // write result 1 | |
1095 punpckhdq_r2r(mm7, mm5); // m33:m23|m13:m03 - produce third result | |
1096 | |
1097 punpcklwd_m2r(*(dataptr+10), mm1); // n11:n01|n10:n00 - interleave first and second lines | |
1098 movq_r2r(mm3, mm0); // copy third line | |
1099 | |
1100 punpckhwd_m2r(*(dataptr+10), mm2); // n13:n03|n12:n02 - interleave first and second lines | |
1101 | |
1102 movq_r2m(mm4, *(dataptr+10)); // write result 2 out | |
1103 punpckldq_r2r(mm7, mm6); // m32:m22|m12:m02 - produce fourth result | |
1104 | |
1105 punpcklwd_m2r(*(dataptr+14), mm3); // n33:n23|n32:n22 - interleave third and fourth lines | |
1106 movq_r2r(mm1, mm4); // copy second intermediate result | |
1107 | |
1108 movq_r2m(mm6, *(dataptr+12)); // write result 3 out | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1109 punpckldq_r2r(mm3, mm1); // |
3802 | 1110 |
1111 punpckhwd_m2r(*(dataptr+14), mm0); // n33:n23|n32:n22 - interleave third and fourth lines | |
1112 movq_r2r(mm2, mm6); // copy second intermediate result | |
1113 | |
1114 movq_r2m(mm5, *(dataptr+14)); // write result 4 out | |
1115 punpckhdq_r2r(mm3, mm4); // n31:n21|n11:n01- produce second result | |
1116 | |
1117 movq_r2m(mm1, *(dataptr+1)); // write result 5 out - (first result for other 4 x 4 block) | |
1118 punpckldq_r2r(mm0, mm2); // n32:n22|n12:n02- produce third result | |
1119 | |
1120 movq_r2m(mm4, *(dataptr+3)); // write result 6 out | |
1121 punpckhdq_r2r(mm0, mm6); // n33:n23|n13:n03 - produce fourth result | |
1122 | |
1123 movq_r2m(mm2, *(dataptr+5)); // write result 7 out | |
1124 | |
1125 movq_m2r(*dataptr, mm0); // m03:m02|m01:m00 - first line, first 4x4 | |
1126 | |
1127 movq_r2m(mm6, *(dataptr+7)); // write result 8 out | |
1128 | |
1129 // Do first 4x4 quadrant, which is used in the beginning of the DCT: | |
1130 | |
1131 movq_m2r(*(dataptr+4), mm7); // m23:m22|m21:m20 - third line | |
1132 movq_r2r(mm0, mm2); // copy first line | |
1133 | |
1134 punpcklwd_m2r(*(dataptr+2), mm0); // m11:m01|m10:m00 - interleave first and second lines | |
1135 movq_r2r(mm7, mm4); // copy third line | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1136 |
3802 | 1137 punpcklwd_m2r(*(dataptr+6), mm7); // m31:m21|m30:m20 - interleave third and fourth lines |
1138 movq_r2r(mm0, mm1); // copy first intermediate result | |
1139 | |
1140 movq_m2r(*(dataptr+2), mm6); // m13:m12|m11:m10 - second line | |
1141 punpckldq_r2r(mm7, mm0); // m30:m20|m10:m00 - interleave to produce result 1 | |
1142 | |
1143 movq_m2r(*(dataptr+6), mm5); // m33:m32|m31:m30 - fourth line | |
1144 punpckhdq_r2r(mm7, mm1); // m31:m21|m11:m01 - interleave to produce result 2 | |
1145 | |
1146 movq_r2r(mm0, mm7); // write result 1 | |
1147 punpckhwd_r2r(mm6, mm2); // m13:m03|m12:m02 - interleave first and second lines | |
1148 | |
1149 psubw_m2r(*(dataptr+14), mm7); // tmp07=x0-x7 /* Stage 1 */ | |
1150 movq_r2r(mm1, mm6); // write result 2 | |
1151 | |
1152 paddw_m2r(*(dataptr+14), mm0); // tmp00=x0+x7 /* Stage 1 */ | |
1153 punpckhwd_r2r(mm5, mm4); // m33:m23|m32:m22 - interleave third and fourth lines | |
1154 | |
1155 paddw_m2r(*(dataptr+12), mm1); // tmp01=x1+x6 /* Stage 1 */ | |
1156 movq_r2r(mm2, mm3); // copy first intermediate result | |
1157 | |
1158 psubw_m2r(*(dataptr+12), mm6); // tmp06=x1-x6 /* Stage 1 */ | |
1159 punpckldq_r2r(mm4, mm2); // m32:m22|m12:m02 - interleave to produce result 3 | |
1160 | |
1161 movq_r2m(mm7, tmp7); // save tmp07 | |
1162 movq_r2r(mm2, mm5); // write result 3 | |
1163 | |
1164 movq_r2m(mm6, tmp6); // save tmp06 | |
1165 | |
1166 punpckhdq_r2r(mm4, mm3); // m33:m23|m13:m03 - interleave to produce result 4 | |
1167 | |
1168 paddw_m2r(*(dataptr+10), mm2); // tmp02=x2+x5 /* stage 1 */ | |
1169 movq_r2r(mm3, mm4); // write result 4 | |
1170 | |
1171 /************************************************************************************************ | |
1172 End of Transpose 2 | |
1173 ************************************************************************************************/ | |
1174 | |
1175 paddw_m2r(*(dataptr+8), mm3); // tmp03=x3+x4 /* stage 1*/ | |
1176 movq_r2r(mm0, mm7); | |
1177 | |
1178 psubw_m2r(*(dataptr+8), mm4); // tmp04=x3-x4 /* stage 1*/ | |
1179 movq_r2r(mm1, mm6); | |
1180 | |
1181 paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 /* even 2 */ | |
1182 psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 /* even 2 */ | |
1183 | |
1184 psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 /* even 2 */ | |
1185 paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 /* even 2 */ | |
1186 | |
1187 psubw_m2r(*(dataptr+10), mm5); // tmp05=x2-x5 /* stage 1*/ | |
1188 paddw_r2r(mm7, mm6); // tmp12 + tmp13 | |
1189 | |
1190 /* stage 3 */ | |
1191 | |
1192 movq_m2r(tmp6, mm2); | |
1193 movq_r2r(mm0, mm3); | |
1194 | |
1195 psllw_i2r(2, mm6); // m8 * 2^2 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1196 paddw_r2r(mm1, mm0); |
3802 | 1197 |
1198 pmulhw_m2r(RTjpeg_C4, mm6); // z1 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1199 psubw_r2r(mm1, mm3); |
3802 | 1200 |
1201 movq_r2m(mm0, *dataptr); | |
1202 movq_r2r(mm7, mm0); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1203 |
3802 | 1204 /* Odd part */ |
1205 movq_r2m(mm3, *(dataptr+8)); | |
1206 paddw_r2r(mm5, mm4); // tmp10 | |
1207 | |
1208 movq_m2r(tmp7, mm3); | |
1209 paddw_r2r(mm6, mm0); // tmp32 | |
1210 | |
1211 paddw_r2r(mm2, mm5); // tmp11 | |
1212 psubw_r2r(mm6, mm7); // tmp33 | |
1213 | |
1214 movq_r2m(mm0, *(dataptr+4)); | |
1215 paddw_r2r(mm3, mm2); // tmp12 | |
1216 | |
1217 /* stage 4 */ | |
1218 movq_r2m(mm7, *(dataptr+12)); | |
1219 movq_r2r(mm4, mm1); // copy of tmp10 | |
1220 | |
1221 psubw_r2r(mm2, mm1); // tmp10 - tmp12 | |
1222 psllw_i2r(2, mm4); // m8 * 2^2 | |
1223 | |
1224 movq_m2r(RTjpeg_C2mC6, mm0); | |
1225 psllw_i2r(2, mm1); | |
1226 | |
1227 pmulhw_m2r(RTjpeg_C6, mm1); // z5 | |
1228 psllw_i2r(2, mm2); | |
1229 | |
1230 pmulhw_r2r(mm0, mm4); // z5 | |
1231 | |
1232 /* stage 5 */ | |
1233 | |
1234 pmulhw_m2r(RTjpeg_C2pC6, mm2); | |
1235 psllw_i2r(2, mm5); | |
1236 | |
1237 pmulhw_m2r(RTjpeg_C4, mm5); // z3 | |
1238 movq_r2r(mm3, mm0); // copy tmp7 | |
1239 | |
1240 movq_m2r(*(dataptr+1), mm7); | |
1241 paddw_r2r(mm1, mm4); // z2 | |
1242 | |
1243 paddw_r2r(mm1, mm2); // z4 | |
1244 | |
1245 paddw_r2r(mm5, mm0); // z11 | |
1246 psubw_r2r(mm5, mm3); // z13 | |
1247 | |
1248 /* stage 6 */ | |
1249 | |
1250 movq_r2r(mm3, mm5); // copy z13 | |
1251 psubw_r2r(mm4, mm3); // y3=z13 - z2 | |
1252 | |
1253 paddw_r2r(mm4, mm5); // y5=z13 + z2 | |
1254 movq_r2r(mm0, mm6); // copy z11 | |
1255 | |
1256 movq_r2m(mm3, *(dataptr+6)); //save y3 | |
1257 psubw_r2r(mm2, mm0); // y7=z11 - z4 | |
1258 | |
1259 movq_r2m(mm5, *(dataptr+10)); //save y5 | |
1260 paddw_r2r(mm2, mm6); // y1=z11 + z4 | |
1261 | |
1262 movq_r2m(mm0, *(dataptr+14)); //save y7 | |
1263 | |
1264 /************************************************ | |
1265 * End of 1st 4 rows | |
1266 ************************************************/ | |
1267 | |
1268 movq_m2r(*(dataptr+3), mm1); // load x1 /* stage 1 */ | |
1269 movq_r2r(mm7, mm0); // copy x0 | |
1270 | |
1271 movq_r2m(mm6, *(dataptr+2)); //save y1 | |
1272 | |
1273 movq_m2r(*(dataptr+5), mm2); // load x2 /* stage 1 */ | |
1274 movq_r2r(mm1, mm6); // copy x1 | |
1275 | |
1276 paddw_m2r(*(dataptr+15), mm0); // tmp00 = x0 + x7 | |
1277 | |
1278 movq_m2r(*(dataptr+7), mm3); // load x3 /* stage 1 */ | |
1279 movq_r2r(mm2, mm5); // copy x2 | |
1280 | |
1281 psubw_m2r(*(dataptr+15), mm7); // tmp07 = x0 - x7 | |
1282 movq_r2r(mm3, mm4); // copy x3 | |
1283 | |
1284 paddw_m2r(*(dataptr+13), mm1); // tmp01 = x1 + x6 | |
1285 | |
1286 movq_r2m(mm7, tmp7); // save tmp07 | |
1287 movq_r2r(mm0, mm7); // copy tmp00 | |
1288 | |
1289 psubw_m2r(*(dataptr+13), mm6); // tmp06 = x1 - x6 | |
1290 | |
1291 /* stage 2, Even Part */ | |
1292 | |
1293 paddw_m2r(*(dataptr+9), mm3); // tmp03 = x3 + x4 | |
1294 | |
1295 movq_r2m(mm6, tmp6); // save tmp07 | |
1296 movq_r2r(mm1, mm6); // copy tmp01 | |
1297 | |
1298 paddw_m2r(*(dataptr+11), mm2); // tmp02 = x2 + x5 | |
1299 paddw_r2r(mm3, mm0); // tmp10 = tmp00 + tmp03 | |
1300 | |
1301 psubw_r2r(mm3, mm7); // tmp13 = tmp00 - tmp03 | |
1302 | |
1303 psubw_m2r(*(dataptr+9), mm4); // tmp04 = x3 - x4 | |
1304 psubw_r2r(mm2, mm6); // tmp12 = tmp01 - tmp02 | |
1305 | |
1306 paddw_r2r(mm2, mm1); // tmp11 = tmp01 + tmp02 | |
1307 | |
1308 psubw_m2r(*(dataptr+11), mm5); // tmp05 = x2 - x5 | |
1309 paddw_r2r(mm7, mm6); // tmp12 + tmp13 | |
1310 | |
1311 /* stage 3, Even and stage 4 & 5 even */ | |
1312 | |
1313 movq_m2r(tmp6, mm2); // load tmp6 | |
1314 movq_r2r(mm0, mm3); // copy tmp10 | |
1315 | |
1316 psllw_i2r(2, mm6); // shift z1 | |
1317 paddw_r2r(mm1, mm0); // y0=tmp10 + tmp11 | |
1318 | |
1319 pmulhw_m2r(RTjpeg_C4, mm6); // z1 | |
1320 psubw_r2r(mm1, mm3); // y4=tmp10 - tmp11 | |
1321 | |
1322 movq_r2m(mm0, *(dataptr+1)); //save y0 | |
1323 movq_r2r(mm7, mm0); // copy tmp13 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1324 |
3802 | 1325 /* odd part */ |
1326 | |
1327 movq_r2m(mm3, *(dataptr+9)); //save y4 | |
1328 paddw_r2r(mm5, mm4); // tmp10 = tmp4 + tmp5 | |
1329 | |
1330 movq_m2r(tmp7, mm3); // load tmp7 | |
1331 paddw_r2r(mm6, mm0); // tmp32 = tmp13 + z1 | |
1332 | |
1333 paddw_r2r(mm2, mm5); // tmp11 = tmp5 + tmp6 | |
1334 psubw_r2r(mm6, mm7); // tmp33 = tmp13 - z1 | |
1335 | |
1336 movq_r2m(mm0, *(dataptr+5)); //save y2 | |
1337 paddw_r2r(mm3, mm2); // tmp12 = tmp6 + tmp7 | |
1338 | |
1339 /* stage 4 */ | |
1340 | |
1341 movq_r2m(mm7, *(dataptr+13)); //save y6 | |
1342 movq_r2r(mm4, mm1); // copy tmp10 | |
1343 | |
1344 psubw_r2r(mm2, mm1); // tmp10 - tmp12 | |
1345 psllw_i2r(2, mm4); // shift tmp10 | |
1346 | |
1347 movq_m2r(RTjpeg_C2mC6, mm0); // load C2mC6 | |
1348 psllw_i2r(2, mm1); // shift (tmp10-tmp12) | |
1349 | |
1350 pmulhw_m2r(RTjpeg_C6, mm1); // z5 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1351 psllw_i2r(2, mm5); // prepare for multiply |
3802 | 1352 |
1353 pmulhw_r2r(mm0, mm4); // multiply by converted real | |
1354 | |
1355 /* stage 5 */ | |
1356 | |
1357 pmulhw_m2r(RTjpeg_C4, mm5); // z3 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1358 psllw_i2r(2, mm2); // prepare for multiply |
3802 | 1359 |
1360 pmulhw_m2r(RTjpeg_C2pC6, mm2); // multiply | |
1361 movq_r2r(mm3, mm0); // copy tmp7 | |
1362 | |
1363 movq_m2r(*(dataptr+9), mm7); // m03:m02|m01:m00 - first line (line 4)and copy into mm7 | |
1364 paddw_r2r(mm1, mm4); // z2 | |
1365 | |
1366 paddw_r2r(mm5, mm0); // z11 | |
1367 psubw_r2r(mm5, mm3); // z13 | |
1368 | |
1369 /* stage 6 */ | |
1370 | |
1371 movq_r2r(mm3, mm5); // copy z13 | |
1372 paddw_r2r(mm1, mm2); // z4 | |
1373 | |
1374 movq_r2r(mm0, mm6); // copy z11 | |
1375 psubw_r2r(mm4, mm5); // y3 | |
1376 | |
1377 paddw_r2r(mm2, mm6); // y1 | |
1378 paddw_r2r(mm4, mm3); // y5 | |
1379 | |
1380 movq_r2m(mm5, *(dataptr+7)); //save y3 | |
1381 psubw_r2r(mm2, mm0); // yè=z11 - z4 | |
1382 | |
1383 movq_r2m(mm3, *(dataptr+11)); //save y5 | |
1384 | |
1385 movq_r2m(mm6, *(dataptr+3)); //save y1 | |
1386 | |
1387 movq_r2m(mm0, *(dataptr+15)); //save y7 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1388 |
3802 | 1389 |
1390 #endif | |
1391 } | |
1392 | |
1393 /* | |
1394 | |
1395 Main Routines | |
1396 | |
1397 This file contains most of the initialisation and control functions | |
1398 | |
1399 (C) Justin Schoeman 1998 | |
1400 | |
1401 */ | |
1402 | |
1403 /* | |
1404 | |
1405 Private function | |
1406 | |
1407 Initialise all the cache-aliged data blocks | |
1408 | |
1409 */ | |
1410 | |
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
1411 static void RTjpeg_init_data(void) |
3802 | 1412 { |
1413 unsigned long dptr; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1414 |
3802 | 1415 dptr=(unsigned long)&(RTjpeg_alldata[0]); |
1416 dptr+=32; | |
1417 dptr=dptr>>5; | |
1418 dptr=dptr<<5; /* cache align data */ | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1419 |
3802 | 1420 RTjpeg_block=(__s16 *)dptr; |
1421 dptr+=sizeof(__s16)*64; | |
1422 RTjpeg_lqt=(__s32 *)dptr; | |
1423 dptr+=sizeof(__s32)*64; | |
1424 RTjpeg_cqt=(__s32 *)dptr; | |
1425 dptr+=sizeof(__s32)*64; | |
1426 RTjpeg_liqt=(__u32 *)dptr; | |
1427 dptr+=sizeof(__u32)*64; | |
1428 RTjpeg_ciqt=(__u32 *)dptr; | |
1429 } | |
1430 | |
1431 /* | |
1432 | |
1433 External Function | |
1434 | |
1435 Re-set quality factor | |
1436 | |
1437 Input: buf -> pointer to 128 ints for quant values store to pass back to | |
1438 init_decompress. | |
1439 Q -> quality factor (192=best, 32=worst) | |
1440 */ | |
1441 | |
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
1442 static void RTjpeg_init_Q(__u8 Q) |
3802 | 1443 { |
1444 int i; | |
1445 __u64 qual; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1446 |
3802 | 1447 qual=(__u64)Q<<(32-7); /* 32 bit FP, 255=2, 0=0 */ |
1448 | |
1449 for(i=0; i<64; i++) | |
1450 { | |
1451 RTjpeg_lqt[i]=(__s32)((qual/((__u64)RTjpeg_lum_quant_tbl[i]<<16))>>3); | |
1452 if(RTjpeg_lqt[i]==0)RTjpeg_lqt[i]=1; | |
1453 RTjpeg_cqt[i]=(__s32)((qual/((__u64)RTjpeg_chrom_quant_tbl[i]<<16))>>3); | |
1454 if(RTjpeg_cqt[i]==0)RTjpeg_cqt[i]=1; | |
1455 RTjpeg_liqt[i]=(1<<16)/(RTjpeg_lqt[i]<<3); | |
1456 RTjpeg_ciqt[i]=(1<<16)/(RTjpeg_cqt[i]<<3); | |
1457 RTjpeg_lqt[i]=((1<<16)/RTjpeg_liqt[i])>>3; | |
1458 RTjpeg_cqt[i]=((1<<16)/RTjpeg_ciqt[i])>>3; | |
1459 } | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1460 |
3802 | 1461 RTjpeg_lb8=0; |
1462 while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8); | |
1463 RTjpeg_lb8--; | |
1464 RTjpeg_cb8=0; | |
1465 while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8); | |
1466 RTjpeg_cb8--; | |
1467 | |
1468 RTjpeg_dct_init(); | |
1469 RTjpeg_quant_init(); | |
1470 } | |
1471 | |
1472 /* | |
1473 | |
1474 External Function | |
1475 | |
1476 Initialise compression. | |
1477 | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1478 Input: buf -> pointer to 128 ints for quant values store to pass back to |
3802 | 1479 init_decompress. |
1480 width -> width of image | |
1481 height -> height of image | |
1482 Q -> quality factor (192=best, 32=worst) | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1483 |
3802 | 1484 */ |
1485 | |
1486 void RTjpeg_init_compress(__u32 *buf, int width, int height, __u8 Q) | |
1487 { | |
1488 int i; | |
1489 __u64 qual; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1490 |
3802 | 1491 RTjpeg_init_data(); |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1492 |
3802 | 1493 RTjpeg_width=width; |
1494 RTjpeg_height=height; | |
1495 RTjpeg_Ywidth = RTjpeg_width>>3; | |
1496 RTjpeg_Ysize=width * height; | |
1497 RTjpeg_Cwidth = RTjpeg_width>>4; | |
1498 RTjpeg_Csize= (width>>1) * height; | |
1499 | |
1500 qual=(__u64)Q<<(32-7); /* 32 bit FP, 255=2, 0=0 */ | |
1501 | |
1502 for(i=0; i<64; i++) | |
1503 { | |
1504 RTjpeg_lqt[i]=(__s32)((qual/((__u64)RTjpeg_lum_quant_tbl[i]<<16))>>3); | |
1505 if(RTjpeg_lqt[i]==0)RTjpeg_lqt[i]=1; | |
1506 RTjpeg_cqt[i]=(__s32)((qual/((__u64)RTjpeg_chrom_quant_tbl[i]<<16))>>3); | |
1507 if(RTjpeg_cqt[i]==0)RTjpeg_cqt[i]=1; | |
1508 RTjpeg_liqt[i]=(1<<16)/(RTjpeg_lqt[i]<<3); | |
1509 RTjpeg_ciqt[i]=(1<<16)/(RTjpeg_cqt[i]<<3); | |
1510 RTjpeg_lqt[i]=((1<<16)/RTjpeg_liqt[i])>>3; | |
1511 RTjpeg_cqt[i]=((1<<16)/RTjpeg_ciqt[i])>>3; | |
1512 } | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1513 |
3802 | 1514 RTjpeg_lb8=0; |
1515 while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8); | |
1516 RTjpeg_lb8--; | |
1517 RTjpeg_cb8=0; | |
1518 while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8); | |
1519 RTjpeg_cb8--; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1520 |
3802 | 1521 RTjpeg_dct_init(); |
1522 RTjpeg_quant_init(); | |
1523 | |
1524 for(i=0; i<64; i++) | |
14896
9ddae5897422
Make nuv files work on bigendian (but old nuv files created with mencoder
reimar
parents:
14642
diff
changeset
|
1525 buf[i]=le2me_32(RTjpeg_liqt[i]); |
3802 | 1526 for(i=0; i<64; i++) |
14896
9ddae5897422
Make nuv files work on bigendian (but old nuv files created with mencoder
reimar
parents:
14642
diff
changeset
|
1527 buf[64+i]=le2me_32(RTjpeg_ciqt[i]); |
3802 | 1528 } |
1529 | |
1530 int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp) | |
1531 { | |
1532 __s8 * sb; | |
1533 register __s8 * bp1 = bp + (RTjpeg_width<<3); | |
1534 register __s8 * bp2 = bp + RTjpeg_Ysize; | |
1535 register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1); | |
1536 register int i, j, k; | |
1537 | |
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
1538 #if HAVE_MMX |
3802 | 1539 emms(); |
1540 #endif | |
1541 sb=sp; | |
1542 /* Y */ | |
1543 for(i=RTjpeg_height>>1; i; i-=8) | |
1544 { | |
1545 for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8) | |
1546 { | |
1547 RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth); | |
1548 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt); | |
1549 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8); | |
1550 | |
1551 RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth); | |
1552 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt); | |
1553 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8); | |
1554 | |
1555 RTjpeg_dctY(bp1+j, RTjpeg_block, RTjpeg_Ywidth); | |
1556 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt); | |
1557 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8); | |
1558 | |
1559 RTjpeg_dctY(bp1+j+8, RTjpeg_block, RTjpeg_Ywidth); | |
1560 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt); | |
1561 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8); | |
1562 | |
1563 RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth); | |
1564 RTjpeg_quant(RTjpeg_block, RTjpeg_cqt); | |
1565 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8); | |
1566 | |
1567 RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth); | |
1568 RTjpeg_quant(RTjpeg_block, RTjpeg_cqt); | |
1569 sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8); | |
1570 | |
1571 } | |
1572 bp+=RTjpeg_width<<4; | |
1573 bp1+=RTjpeg_width<<4; | |
1574 bp2+=RTjpeg_width<<2; | |
1575 bp3+=RTjpeg_width<<2; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1576 |
3802 | 1577 } |
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
1578 #if HAVE_MMX |
3802 | 1579 emms(); |
1580 #endif | |
1581 return (sp-sb); | |
1582 } | |
1583 | |
1584 /* | |
1585 External Function | |
1586 | |
1587 Initialise additional data structures for motion compensation | |
1588 | |
1589 */ | |
1590 | |
1591 void RTjpeg_init_mcompress(void) | |
1592 { | |
1593 unsigned long tmp; | |
1594 | |
1595 if(!RTjpeg_old) | |
1596 { | |
1597 RTjpeg_old=malloc((4*RTjpeg_width*RTjpeg_height)+32); | |
1598 tmp=(unsigned long)RTjpeg_old; | |
1599 tmp+=32; | |
1600 tmp=tmp>>5; | |
1601 RTjpeg_old=(__s16 *)(tmp<<5); | |
1602 } | |
1603 if (!RTjpeg_old) | |
1604 { | |
1605 fprintf(stderr, "RTjpeg: Could not allocate memory\n"); | |
1606 exit(-1); | |
1607 } | |
14642
38572280e8e7
bzero is deprecated patch by Gianluigi Tiesi <mplayer at netfarm.it>
faust3
parents:
12928
diff
changeset
|
1608 memset(RTjpeg_old, 0, ((4*RTjpeg_width*RTjpeg_height))); |
3802 | 1609 } |
1610 | |
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
1611 #if HAVE_MMX |
3802 | 1612 |
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
1613 static int RTjpeg_bcomp(__s16 *old, mmx_t *mask) |
3802 | 1614 { |
1615 int i; | |
1616 mmx_t *mold=(mmx_t *)old; | |
1617 mmx_t *mblock=(mmx_t *)RTjpeg_block; | |
1618 volatile mmx_t result; | |
12928 | 1619 static mmx_t neg={0xffffffffffffffffULL}; |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1620 |
3802 | 1621 movq_m2r(*mask, mm7); |
1622 movq_m2r(neg, mm6); | |
1623 pxor_r2r(mm5, mm5); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1624 |
3802 | 1625 for(i=0; i<8; i++) |
1626 { | |
1627 movq_m2r(*(mblock++), mm0); | |
1628 movq_m2r(*(mblock++), mm2); | |
1629 movq_m2r(*(mold++), mm1); | |
1630 movq_m2r(*(mold++), mm3); | |
1631 psubsw_r2r(mm1, mm0); | |
1632 psubsw_r2r(mm3, mm2); | |
1633 movq_r2r(mm0, mm1); | |
1634 movq_r2r(mm2, mm3); | |
1635 pcmpgtw_r2r(mm7, mm0); | |
1636 pcmpgtw_r2r(mm7, mm2); | |
1637 pxor_r2r(mm6, mm1); | |
1638 pxor_r2r(mm6, mm3); | |
1639 pcmpgtw_r2r(mm7, mm1); | |
1640 pcmpgtw_r2r(mm7, mm3); | |
1641 por_r2r(mm0, mm5); | |
1642 por_r2r(mm2, mm5); | |
1643 por_r2r(mm1, mm5); | |
1644 por_r2r(mm3, mm5); | |
1645 } | |
1646 movq_r2m(mm5, result); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1647 |
3802 | 1648 if(result.q) |
1649 { | |
1650 return 0; | |
1651 } | |
1652 return 1; | |
1653 } | |
1654 | |
1655 #else | |
28849
87b59e8d3c26
Mark everything not used outside the file as "static"
reimar
parents:
28298
diff
changeset
|
1656 static int RTjpeg_bcomp(__s16 *old, __u16 *mask) |
3802 | 1657 { |
1658 int i; | |
1659 | |
1660 for(i=0; i<64; i++) | |
1661 if(abs(old[i]-RTjpeg_block[i])>*mask) | |
1662 { | |
1663 for(i=0; i<16; i++)((__u64 *)old)[i]=((__u64 *)RTjpeg_block)[i]; | |
1664 return 0; | |
1665 } | |
1666 return 1; | |
1667 } | |
1668 #endif | |
1669 | |
1670 int RTjpeg_mcompressYUV420(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask) | |
1671 { | |
1672 __s8 * sb; | |
1673 register __s8 * bp1 = bp + (RTjpeg_width<<3); | |
1674 register __s8 * bp2 = bp + RTjpeg_Ysize; | |
1675 register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1); | |
1676 register int i, j, k; | |
1677 | |
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
1678 #if HAVE_MMX |
3802 | 1679 emms(); |
16653
27b0d49988b2
Fix 100l bugs that break playback on 64 bit systems (like typedefing __u32
reimar
parents:
14896
diff
changeset
|
1680 RTjpeg_lmask.uq=((__u64)lmask<<48)|((__u64)lmask<<32)|((__u64)lmask<<16)|lmask; |
27b0d49988b2
Fix 100l bugs that break playback on 64 bit systems (like typedefing __u32
reimar
parents:
14896
diff
changeset
|
1681 RTjpeg_cmask.uq=((__u64)cmask<<48)|((__u64)cmask<<32)|((__u64)cmask<<16)|cmask; |
3802 | 1682 #else |
16661
adb581352e63
Stupidity in last patch broke compile without MMX: RTjpeg_lmask is a union
reimar
parents:
16653
diff
changeset
|
1683 RTjpeg_lmask=lmask; |
adb581352e63
Stupidity in last patch broke compile without MMX: RTjpeg_lmask is a union
reimar
parents:
16653
diff
changeset
|
1684 RTjpeg_cmask=cmask; |
3802 | 1685 #endif |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1686 |
3802 | 1687 sb=sp; |
1688 block=RTjpeg_old; | |
1689 /* Y */ | |
1690 for(i=RTjpeg_height>>1; i; i-=8) | |
1691 { | |
1692 for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8) | |
1693 { | |
1694 RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth); | |
1695 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt); | |
1696 if(RTjpeg_bcomp(block, &RTjpeg_lmask)) | |
1697 { | |
1698 *((__u8 *)sp++)=255; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1699 } |
3802 | 1700 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8); |
1701 block+=64; | |
1702 | |
1703 RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth); | |
1704 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt); | |
1705 if(RTjpeg_bcomp(block, &RTjpeg_lmask)) | |
1706 { | |
1707 *((__u8 *)sp++)=255; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1708 } |
3802 | 1709 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8); |
1710 block+=64; | |
1711 | |
1712 RTjpeg_dctY(bp1+j, RTjpeg_block, RTjpeg_Ywidth); | |
1713 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt); | |
1714 if(RTjpeg_bcomp(block, &RTjpeg_lmask)) | |
1715 { | |
1716 *((__u8 *)sp++)=255; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1717 } |
3802 | 1718 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8); |
1719 block+=64; | |
1720 | |
1721 RTjpeg_dctY(bp1+j+8, RTjpeg_block, RTjpeg_Ywidth); | |
1722 RTjpeg_quant(RTjpeg_block, RTjpeg_lqt); | |
1723 if(RTjpeg_bcomp(block, &RTjpeg_lmask)) | |
1724 { | |
1725 *((__u8 *)sp++)=255; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1726 } |
3802 | 1727 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8); |
1728 block+=64; | |
1729 | |
1730 RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth); | |
1731 RTjpeg_quant(RTjpeg_block, RTjpeg_cqt); | |
1732 if(RTjpeg_bcomp(block, &RTjpeg_cmask)) | |
1733 { | |
1734 *((__u8 *)sp++)=255; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1735 } |
3802 | 1736 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8); |
1737 block+=64; | |
1738 | |
1739 RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth); | |
1740 RTjpeg_quant(RTjpeg_block, RTjpeg_cqt); | |
1741 if(RTjpeg_bcomp(block, &RTjpeg_cmask)) | |
1742 { | |
1743 *((__u8 *)sp++)=255; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1744 } |
3802 | 1745 else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8); |
1746 block+=64; | |
1747 } | |
1748 bp+=RTjpeg_width<<4; | |
1749 bp1+=RTjpeg_width<<4; | |
1750 bp2+=RTjpeg_width<<2; | |
1751 bp3+=RTjpeg_width<<2; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
28853
diff
changeset
|
1752 |
3802 | 1753 } |
28298
a7124a264ea6
Completely get rid of MMX define, use HAVE_MMX define instead.
gpoirier
parents:
28296
diff
changeset
|
1754 #if HAVE_MMX |
3802 | 1755 emms(); |
1756 #endif | |
1757 return (sp-sb); | |
1758 } |