annotate libmpcodecs/vf_fspp.c @ 34809:ea97bcb28df1

Allow direct rendering for non-reference frames in H.264. This might work for other codecs that currently have DR disabled, but H.264 is the only one tested so far.
author reimar
date Mon, 14 May 2012 18:11:24 +0000
parents a28c4ba97171
children b4ce15212bfc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1 /*
26727
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
2 * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at>
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
3 * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
4 *
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
5 * This file is part of MPlayer.
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
6 *
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
7 * MPlayer is free software; you can redistribute it and/or modify
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
8 * it under the terms of the GNU General Public License as published by
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
9 * the Free Software Foundation; either version 2 of the License, or
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
10 * (at your option) any later version.
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
11 *
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
12 * MPlayer is distributed in the hope that it will be useful,
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
15 * GNU General Public License for more details.
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
16 *
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
17 * You should have received a copy of the GNU General Public License along
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
18 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
82601a38e2a7 Use standard license headers.
diego
parents: 26052
diff changeset
20 */
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
21
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
22 /*
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
23 * This implementation is based on an algorithm described in
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
24 * "Aria Nosratinia Embedded Post-Processing for
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
25 * Enhancement of Compressed Images (1999)"
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
26 * (http://citeseer.nj.nec.com/nosratinia99embedded.html)
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
27 * Futher, with splitting (i)dct into hor/ver passes, one of them can be
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
28 * performed once per block, not pixel. This allows for much better speed.
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
29 */
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
30
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
31 /*
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
32 Heavily optimized version of SPP filter by Nikolaj
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
33 */
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
34
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
35 #include <stdio.h>
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
36 #include <stdlib.h>
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
37 #include <string.h>
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
38 #include <inttypes.h>
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
39 #include <math.h>
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
40
17012
6ff3379a0862 Unify include path handling, -I.. is in CFLAGS.
diego
parents: 16018
diff changeset
41 #include "config.h"
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
42
17012
6ff3379a0862 Unify include path handling, -I.. is in CFLAGS.
diego
parents: 16018
diff changeset
43 #include "mp_msg.h"
6ff3379a0862 Unify include path handling, -I.. is in CFLAGS.
diego
parents: 16018
diff changeset
44 #include "cpudetect.h"
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
45 #include "img_format.h"
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
46 #include "mp_image.h"
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
47 #include "vf.h"
33871
30f5e5cd3676 Move code for setting up libav* logging callbacks from vd_ffmpeg to a
reimar
parents: 32702
diff changeset
48 #include "av_helpers.h"
17012
6ff3379a0862 Unify include path handling, -I.. is in CFLAGS.
diego
parents: 16018
diff changeset
49 #include "libvo/fastmemcpy.h"
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
50
28588
7f03a6d3c941 Move FFmpeg #includes below all others so that they do not override
diego
parents: 28327
diff changeset
51 #include "libavutil/internal.h"
7f03a6d3c941 Move FFmpeg #includes below all others so that they do not override
diego
parents: 28327
diff changeset
52 #include "libavutil/intreadwrite.h"
7f03a6d3c941 Move FFmpeg #includes below all others so that they do not override
diego
parents: 28327
diff changeset
53 #include "libavutil/mem.h"
34198
064be070d8c8 cpudetect: clean up libavutil #includes
diego
parents: 33871
diff changeset
54 #include "libavutil/x86_cpu.h"
28588
7f03a6d3c941 Move FFmpeg #includes below all others so that they do not override
diego
parents: 28327
diff changeset
55 #include "libavcodec/avcodec.h"
7f03a6d3c941 Move FFmpeg #includes below all others so that they do not override
diego
parents: 28327
diff changeset
56 #include "libavcodec/dsputil.h"
7f03a6d3c941 Move FFmpeg #includes below all others so that they do not override
diego
parents: 28327
diff changeset
57
28327
c39a1fd7d45c Fix compilation after DECLARE_ASM_CONST/DECLARE_ALIGNED moving within FFmpeg.
diego
parents: 28290
diff changeset
58 #undef free
c39a1fd7d45c Fix compilation after DECLARE_ASM_CONST/DECLARE_ALIGNED moving within FFmpeg.
diego
parents: 28290
diff changeset
59 #undef malloc
c39a1fd7d45c Fix compilation after DECLARE_ASM_CONST/DECLARE_ALIGNED moving within FFmpeg.
diego
parents: 28290
diff changeset
60
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
61 //===========================================================================//
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
62 #define BLOCKSZ 12
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
63
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
64 static const short custom_threshold[64]=
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
65 // values (296) can't be too high
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
66 // -it causes too big quant dependence
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
67 // or maybe overflow(check), which results in some flashing
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
68 { 71, 296, 295, 237, 71, 40, 38, 19,
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
69 245, 193, 185, 121, 102, 73, 53, 27,
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
70 158, 129, 141, 107, 97, 73, 50, 26,
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
71 102, 116, 109, 98, 82, 66, 45, 23,
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
72 71, 94, 95, 81, 70, 56, 38, 20,
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
73 56, 77, 74, 66, 56, 44, 30, 15,
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
74 38, 53, 50, 45, 38, 30, 21, 11,
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
75 20, 27, 26, 23, 20, 15, 11, 5
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
76 };
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
77
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
78 static const uint8_t __attribute__((aligned(32))) dither[8][8]={
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
79 { 0, 48, 12, 60, 3, 51, 15, 63, },
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
80 { 32, 16, 44, 28, 35, 19, 47, 31, },
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
81 { 8, 56, 4, 52, 11, 59, 7, 55, },
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
82 { 40, 24, 36, 20, 43, 27, 39, 23, },
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
83 { 2, 50, 14, 62, 1, 49, 13, 61, },
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
84 { 34, 18, 46, 30, 33, 17, 45, 29, },
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
85 { 10, 58, 6, 54, 9, 57, 5, 53, },
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
86 { 42, 26, 38, 22, 41, 25, 37, 21, },
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
87 };
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
88
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
89 struct vf_priv_s { //align 16 !
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
90 uint64_t threshold_mtx_noq[8*2];
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
91 uint64_t threshold_mtx[8*2];//used in both C & MMX (& later SSE2) versions
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
92
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
93 int log2_count;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
94 int temp_stride;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
95 int qp;
17225
ec9888363742 reverse the H264 hack
henry
parents: 17223
diff changeset
96 int mpeg2;
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
97 int prev_q;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
98 uint8_t *src;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
99 int16_t *temp;
17225
ec9888363742 reverse the H264 hack
henry
parents: 17223
diff changeset
100 int bframes;
17133
a2b24e0d7772 prevent flicker on b-frames, trivial port from vf_spp
henry
parents: 17012
diff changeset
101 char *non_b_qp;
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
102 };
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
103
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
104
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
105 #if !HAVE_MMX
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
106
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
107 //This func reads from 1 slice, 1 and clears 0 & 1
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
108 static void store_slice_c(uint8_t *dst, int16_t *src, int dst_stride, int src_stride, int width, int height, int log2_scale)
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
109 {int y, x;
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
110 #define STORE(pos) \
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
111 temp= (src[x + pos] + (d[pos]>>log2_scale))>>(6-log2_scale); \
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
112 src[x + pos]=src[x + pos - 8*src_stride]=0; \
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
113 if(temp & 0x100) temp= ~(temp>>31); \
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
114 dst[x + pos]= temp;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
115
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
116 for(y=0; y<height; y++){
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
117 const uint8_t *d= dither[y];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
118 for(x=0; x<width; x+=8){
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
119 int temp;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
120 STORE(0);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
121 STORE(1);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
122 STORE(2);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
123 STORE(3);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
124 STORE(4);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
125 STORE(5);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
126 STORE(6);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
127 STORE(7);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
128 }
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
129 src+=src_stride;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
130 dst+=dst_stride;
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
131 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
132 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
133
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
134 //This func reads from 2 slices, 0 & 2 and clears 2-nd
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
135 static void store_slice2_c(uint8_t *dst, int16_t *src, int dst_stride, int src_stride, int width, int height, int log2_scale)
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
136 {int y, x;
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
137 #define STORE2(pos) \
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
138 temp= (src[x + pos] + src[x + pos + 16*src_stride] + (d[pos]>>log2_scale))>>(6-log2_scale); \
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
139 src[x + pos + 16*src_stride]=0; \
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
140 if(temp & 0x100) temp= ~(temp>>31); \
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
141 dst[x + pos]= temp;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
142
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
143 for(y=0; y<height; y++){
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
144 const uint8_t *d= dither[y];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
145 for(x=0; x<width; x+=8){
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
146 int temp;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
147 STORE2(0);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
148 STORE2(1);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
149 STORE2(2);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
150 STORE2(3);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
151 STORE2(4);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
152 STORE2(5);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
153 STORE2(6);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
154 STORE2(7);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
155 }
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
156 src+=src_stride;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
157 dst+=dst_stride;
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
158 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
159 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
160
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
161 static void mul_thrmat_c(struct vf_priv_s *p,int q)
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
162 {
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
163 int a;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
164 for(a=0;a<64;a++)
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
165 ((short*)p->threshold_mtx)[a]=q * ((short*)p->threshold_mtx_noq)[a];//ints faster in C
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
166 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
167
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
168 static void column_fidct_c(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int cnt);
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
169 static void row_idct_c(DCTELEM* workspace,
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
170 int16_t* output_adr, int output_stride, int cnt);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
171 static void row_fdct_c(DCTELEM *data, const uint8_t *pixels, int line_size, int cnt);
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
172
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
173 //this is rather ugly, but there is no need for function pointers
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
174 #define store_slice_s store_slice_c
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
175 #define store_slice2_s store_slice2_c
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
176 #define mul_thrmat_s mul_thrmat_c
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
177 #define column_fidct_s column_fidct_c
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
178 #define row_idct_s row_idct_c
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
179 #define row_fdct_s row_fdct_c
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
180
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
181 #else /* HAVE_MMX */
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
182
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
183 //This func reads from 1 slice, 1 and clears 0 & 1
15634
7eddcf69a5fd x86-64 fixes by Reimar
henry
parents: 15633
diff changeset
184 static void store_slice_mmx(uint8_t *dst, int16_t *src, long dst_stride, long src_stride, long width, long height, long log2_scale)
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
185 {
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
186 const uint8_t *od=&dither[0][0];
15634
7eddcf69a5fd x86-64 fixes by Reimar
henry
parents: 15633
diff changeset
187 const uint8_t *end=&dither[height][0];
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
188 width = (width+7)&~7;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
189 dst_stride-=width;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
190 //src_stride=(src_stride-width)*2;
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 26727
diff changeset
191 __asm__ volatile(
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
192 "mov %5, %%"REG_d" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
193 "mov %6, %%"REG_S" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
194 "mov %7, %%"REG_D" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
195 "mov %1, %%"REG_a" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
196 "movd %%"REG_d", %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
197 "xor $-1, %%"REG_d" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
198 "mov %%"REG_a", %%"REG_c" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
199 "add $7, %%"REG_d" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
200 "neg %%"REG_a" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
201 "sub %0, %%"REG_c" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
202 "add %%"REG_c", %%"REG_c" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
203 "movd %%"REG_d", %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
204 "mov %%"REG_c", %1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
205 "mov %2, %%"REG_d" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
206 "shl $4, %%"REG_a" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
207
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
208 "2: \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
209 "movq (%%"REG_d"), %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
210 "movq %%mm3, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
211 "pxor %%mm7, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
212 "punpcklbw %%mm7, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
213 "punpckhbw %%mm7, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
214 "mov %0, %%"REG_c" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
215 "psraw %%mm5, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
216 "psraw %%mm5, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
217 "1: \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
218 "movq %%mm7, (%%"REG_S",%%"REG_a",) \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
219 "movq (%%"REG_S"), %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
220 "movq 8(%%"REG_S"), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
221
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
222 "movq %%mm7, 8(%%"REG_S",%%"REG_a",) \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
223 "paddw %%mm3, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
224 "paddw %%mm4, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
225
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
226 "movq %%mm7, (%%"REG_S") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
227 "psraw %%mm2, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
228 "psraw %%mm2, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
229
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
230 "movq %%mm7, 8(%%"REG_S") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
231 "packuswb %%mm1, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
232 "add $16, %%"REG_S" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
233
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
234 "movq %%mm0, (%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
235 "add $8, %%"REG_D" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
236 "sub $8, %%"REG_c" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
237 "jg 1b \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
238 "add %1, %%"REG_S" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
239 "add $8, %%"REG_d" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
240 "add %3, %%"REG_D" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
241 "cmp %4, %%"REG_d" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
242 "jl 2b \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
243
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
244 :
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
245 : "m" (width), "m" (src_stride), "erm" (od), "m" (dst_stride), "erm" (end),
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
246 "m" (log2_scale), "m" (src), "m" (dst) //input
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
247 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
248 );
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
249 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
250
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
251 //This func reads from 2 slices, 0 & 2 and clears 2-nd
15634
7eddcf69a5fd x86-64 fixes by Reimar
henry
parents: 15633
diff changeset
252 static void store_slice2_mmx(uint8_t *dst, int16_t *src, long dst_stride, long src_stride, long width, long height, long log2_scale)
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
253 {
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
254 const uint8_t *od=&dither[0][0];
15634
7eddcf69a5fd x86-64 fixes by Reimar
henry
parents: 15633
diff changeset
255 const uint8_t *end=&dither[height][0];
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
256 width = (width+7)&~7;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
257 dst_stride-=width;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
258 //src_stride=(src_stride-width)*2;
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 26727
diff changeset
259 __asm__ volatile(
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
260 "mov %5, %%"REG_d" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
261 "mov %6, %%"REG_S" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
262 "mov %7, %%"REG_D" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
263 "mov %1, %%"REG_a" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
264 "movd %%"REG_d", %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
265 "xor $-1, %%"REG_d" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
266 "mov %%"REG_a", %%"REG_c" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
267 "add $7, %%"REG_d" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
268 "sub %0, %%"REG_c" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
269 "add %%"REG_c", %%"REG_c" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
270 "movd %%"REG_d", %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
271 "mov %%"REG_c", %1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
272 "mov %2, %%"REG_d" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
273 "shl $5, %%"REG_a" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
274
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
275 "2: \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
276 "movq (%%"REG_d"), %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
277 "movq %%mm3, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
278 "pxor %%mm7, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
279 "punpcklbw %%mm7, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
280 "punpckhbw %%mm7, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
281 "mov %0, %%"REG_c" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
282 "psraw %%mm5, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
283 "psraw %%mm5, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
284 "1: \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
285 "movq (%%"REG_S"), %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
286 "movq 8(%%"REG_S"), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
287 "paddw %%mm3, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
288
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
289 "paddw (%%"REG_S",%%"REG_a",), %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
290 "paddw %%mm4, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
291 "movq 8(%%"REG_S",%%"REG_a",), %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
292
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
293 "movq %%mm7, (%%"REG_S",%%"REG_a",) \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
294 "psraw %%mm2, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
295 "paddw %%mm6, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
296
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
297 "movq %%mm7, 8(%%"REG_S",%%"REG_a",) \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
298 "psraw %%mm2, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
299 "packuswb %%mm1, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
300
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
301 "movq %%mm0, (%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
302 "add $16, %%"REG_S" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
303 "add $8, %%"REG_D" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
304 "sub $8, %%"REG_c" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
305 "jg 1b \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
306 "add %1, %%"REG_S" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
307 "add $8, %%"REG_d" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
308 "add %3, %%"REG_D" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
309 "cmp %4, %%"REG_d" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
310 "jl 2b \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
311
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
312 :
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
313 : "m" (width), "m" (src_stride), "erm" (od), "m" (dst_stride), "erm" (end),
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
314 "m" (log2_scale), "m" (src), "m" (dst) //input
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
315 : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_D, "%"REG_S
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
316 );
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
317 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
318
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
319 static void mul_thrmat_mmx(struct vf_priv_s *p, int q)
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
320 {
15634
7eddcf69a5fd x86-64 fixes by Reimar
henry
parents: 15633
diff changeset
321 uint64_t *adr=&p->threshold_mtx_noq[0];
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 26727
diff changeset
322 __asm__ volatile(
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
323 "movd %0, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
324 "add $8*8*2, %%"REG_D" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
325 "movq 0*8(%%"REG_S"), %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
326 "punpcklwd %%mm7, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
327 "movq 1*8(%%"REG_S"), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
328 "punpckldq %%mm7, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
329 "pmullw %%mm7, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
330
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
331 "movq 2*8(%%"REG_S"), %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
332 "pmullw %%mm7, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
333
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
334 "movq 3*8(%%"REG_S"), %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
335 "pmullw %%mm7, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
336
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
337 "movq %%mm0, 0*8(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
338 "movq 4*8(%%"REG_S"), %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
339 "pmullw %%mm7, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
340
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
341 "movq %%mm1, 1*8(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
342 "movq 5*8(%%"REG_S"), %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
343 "pmullw %%mm7, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
344
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
345 "movq %%mm2, 2*8(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
346 "movq 6*8(%%"REG_S"), %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
347 "pmullw %%mm7, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
348
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
349 "movq %%mm3, 3*8(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
350 "movq 7*8+0*8(%%"REG_S"), %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
351 "pmullw %%mm7, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
352
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
353 "movq %%mm4, 4*8(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
354 "movq 7*8+1*8(%%"REG_S"), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
355 "pmullw %%mm7, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
356
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
357 "movq %%mm5, 5*8(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
358 "movq 7*8+2*8(%%"REG_S"), %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
359 "pmullw %%mm7, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
360
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
361 "movq %%mm6, 6*8(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
362 "movq 7*8+3*8(%%"REG_S"), %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
363 "pmullw %%mm7, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
364
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
365 "movq %%mm0, 7*8+0*8(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
366 "movq 7*8+4*8(%%"REG_S"), %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
367 "pmullw %%mm7, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
368
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
369 "movq %%mm1, 7*8+1*8(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
370 "movq 7*8+5*8(%%"REG_S"), %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
371 "pmullw %%mm7, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
372
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
373 "movq %%mm2, 7*8+2*8(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
374 "movq 7*8+6*8(%%"REG_S"), %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
375 "pmullw %%mm7, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
376
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
377 "movq %%mm3, 7*8+3*8(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
378 "movq 14*8+0*8(%%"REG_S"), %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
379 "pmullw %%mm7, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
380
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
381 "movq %%mm4, 7*8+4*8(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
382 "movq 14*8+1*8(%%"REG_S"), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
383 "pmullw %%mm7, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
384
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
385 "movq %%mm5, 7*8+5*8(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
386 "pmullw %%mm7, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
387
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
388 "movq %%mm6, 7*8+6*8(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
389 "movq %%mm0, 14*8+0*8(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
390 "movq %%mm1, 14*8+1*8(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
391
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
392 : "+g" (q), "+S" (adr), "+D" (adr)
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
393 :
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
394 );
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
395 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
396
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
397 static void column_fidct_mmx(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int cnt);
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
398 static void row_idct_mmx(DCTELEM* workspace,
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
399 int16_t* output_adr, int output_stride, int cnt);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
400 static void row_fdct_mmx(DCTELEM *data, const uint8_t *pixels, int line_size, int cnt);
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
401
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
402 #define store_slice_s store_slice_mmx
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
403 #define store_slice2_s store_slice2_mmx
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
404 #define mul_thrmat_s mul_thrmat_mmx
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
405 #define column_fidct_s column_fidct_mmx
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
406 #define row_idct_s row_idct_mmx
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
407 #define row_fdct_s row_fdct_mmx
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
408 #endif // HAVE_MMX
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
409
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
410 static void filter(struct vf_priv_s *p, uint8_t *dst, uint8_t *src,
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
411 int dst_stride, int src_stride,
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
412 int width, int height,
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
413 uint8_t *qp_store, int qp_stride, int is_luma)
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
414 {
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
415 int x, x0, y, es, qy, t;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
416 const int stride= is_luma ? p->temp_stride : (width+16);//((width+16+15)&(~15))
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
417 const int step=6-p->log2_count;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
418 const int qps= 3 + is_luma;
15634
7eddcf69a5fd x86-64 fixes by Reimar
henry
parents: 15633
diff changeset
419 int32_t __attribute__((aligned(32))) block_align[4*8*BLOCKSZ+ 4*8*BLOCKSZ];
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
420 DCTELEM *block= (DCTELEM *)block_align;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
421 DCTELEM *block3=(DCTELEM *)(block_align+4*8*BLOCKSZ);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
422
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
423 memset(block3, 0, 4*8*BLOCKSZ);
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
424
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
425 //p->src=src-src_stride*8-8;//!
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
426 if (!src || !dst) return; // HACK avoid crash for Y8 colourspace
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
427 for(y=0; y<height; y++){
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
428 int index= 8 + 8*stride + y*stride;
23457
a124f3abc1ec Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents: 21578
diff changeset
429 fast_memcpy(p->src + index, src + y*src_stride, width);//this line can be avoided by using DR & user fr.buffers
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
430 for(x=0; x<8; x++){
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
431 p->src[index - x - 1]= p->src[index + x ];
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
432 p->src[index + width + x ]= p->src[index + width - x - 1];
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
433 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
434 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
435 for(y=0; y<8; y++){
23457
a124f3abc1ec Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents: 21578
diff changeset
436 fast_memcpy(p->src + ( 7-y)*stride, p->src + ( y+8)*stride, stride);
a124f3abc1ec Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents: 21578
diff changeset
437 fast_memcpy(p->src + (height+8+y)*stride, p->src + (height-y+7)*stride, stride);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
438 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
439 //FIXME (try edge emu)
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
440
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
441 for(y=8; y<24; y++)
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
442 memset(p->temp+ 8 +y*stride, 0,width*sizeof(int16_t));
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
443
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
444 for(y=step; y<height+8; y+=step){ //step= 1,2
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
445 qy=y-4;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
446 if (qy>height-1) qy=height-1;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
447 if (qy<0) qy=0;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
448 qy=(qy>>qps)*qp_stride;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
449 row_fdct_s(block, p->src + y*stride +2-(y&1), stride, 2);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
450 for(x0=0; x0<width+8-8*(BLOCKSZ-1); x0+=8*(BLOCKSZ-1)){
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
451 row_fdct_s(block+8*8, p->src + y*stride+8+x0 +2-(y&1), stride, 2*(BLOCKSZ-1));
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
452 if(p->qp)
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
453 column_fidct_s((int16_t*)(&p->threshold_mtx[0]), block+0*8, block3+0*8, 8*(BLOCKSZ-1)); //yes, this is a HOTSPOT
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
454 else
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
455 for (x=0; x<8*(BLOCKSZ-1); x+=8) {
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
456 t=x+x0-2; //correct t=x+x0-2-(y&1), but its the same
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
457 if (t<0) t=0;//t always < width-2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
458 t=qp_store[qy+(t>>qps)];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
459 t=norm_qscale(t, p->mpeg2);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
460 if (t!=p->prev_q) p->prev_q=t, mul_thrmat_s(p, t);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
461 column_fidct_s((int16_t*)(&p->threshold_mtx[0]), block+x*8, block3+x*8, 8); //yes, this is a HOTSPOT
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
462 }
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
463 row_idct_s(block3+0*8, p->temp + (y&15)*stride+x0+2-(y&1), stride, 2*(BLOCKSZ-1));
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
464 memmove(block, block+(BLOCKSZ-1)*64, 8*8*sizeof(DCTELEM)); //cycling
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
465 memmove(block3, block3+(BLOCKSZ-1)*64, 6*8*sizeof(DCTELEM));
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
466 }
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
467 //
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
468 es=width+8-x0; // 8, ...
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
469 if (es>8)
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
470 row_fdct_s(block+8*8, p->src + y*stride+8+x0 +2-(y&1), stride, (es-4)>>2);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
471 column_fidct_s((int16_t*)(&p->threshold_mtx[0]), block, block3, es&(~1));
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
472 row_idct_s(block3+0*8, p->temp + (y&15)*stride+x0+2-(y&1), stride, es>>2);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
473 {const int y1=y-8+step;//l5-7 l4-6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
474 if (!(y1&7) && y1) {
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
475 if (y1&8) store_slice_s(dst + (y1-8)*dst_stride, p->temp+ 8 +8*stride,
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
476 dst_stride, stride, width, 8, 5-p->log2_count);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
477 else store_slice2_s(dst + (y1-8)*dst_stride, p->temp+ 8 +0*stride,
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
478 dst_stride, stride, width, 8, 5-p->log2_count);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
479 } }
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
480 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
481
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
482 if (y&7) { // == height & 7
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
483 if (y&8) store_slice_s(dst + ((y-8)&~7)*dst_stride, p->temp+ 8 +8*stride,
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
484 dst_stride, stride, width, y&7, 5-p->log2_count);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
485 else store_slice2_s(dst + ((y-8)&~7)*dst_stride, p->temp+ 8 +0*stride,
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
486 dst_stride, stride, width, y&7, 5-p->log2_count);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
487 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
488 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
489
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
490 static int config(struct vf_instance *vf,
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
491 int width, int height, int d_width, int d_height,
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
492 unsigned int flags, unsigned int outfmt)
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
493 {
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
494 int h= (height+16+15)&(~15);
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
495
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
496 vf->priv->temp_stride= (width+16+15)&(~15);
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
497 vf->priv->temp= (int16_t*)av_mallocz(vf->priv->temp_stride*3*8*sizeof(int16_t));
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
498 //this can also be avoided, see above
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
499 vf->priv->src = (uint8_t*)av_malloc(vf->priv->temp_stride*h*sizeof(uint8_t));
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
500
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
501 return vf_next_config(vf,width,height,d_width,d_height,flags,outfmt);
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
502 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
503
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
504 static void get_image(struct vf_instance *vf, mp_image_t *mpi)
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
505 {
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
506 if(mpi->flags&MP_IMGFLAG_PRESERVE) return; // don't change
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
507 // ok, we can do pp in-place (or pp disabled):
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
508 vf->dmpi=vf_get_image(vf->next,mpi->imgfmt,
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
509 mpi->type, mpi->flags, mpi->width, mpi->height);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
510 mpi->planes[0]=vf->dmpi->planes[0];
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
511 mpi->stride[0]=vf->dmpi->stride[0];
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
512 mpi->width=vf->dmpi->width;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
513 if(mpi->flags&MP_IMGFLAG_PLANAR){
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
514 mpi->planes[1]=vf->dmpi->planes[1];
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
515 mpi->planes[2]=vf->dmpi->planes[2];
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
516 mpi->stride[1]=vf->dmpi->stride[1];
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
517 mpi->stride[2]=vf->dmpi->stride[2];
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
518 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
519 mpi->flags|=MP_IMGFLAG_DIRECT;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
520 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
521
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
522 static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
523 {
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
524 mp_image_t *dmpi;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
525 if(!(mpi->flags&MP_IMGFLAG_DIRECT)){
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
526 // no DR, so get a new image! hope we'll get DR buffer:
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
527 dmpi=vf_get_image(vf->next,mpi->imgfmt,
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
528 MP_IMGTYPE_TEMP,
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
529 MP_IMGFLAG_ACCEPT_STRIDE|MP_IMGFLAG_PREFER_ALIGNED_STRIDE,
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
530 mpi->width,mpi->height);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
531 vf_clone_mpi_attributes(dmpi, mpi);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
532 }else{
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
533 dmpi=vf->dmpi;
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
534 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
535
17225
ec9888363742 reverse the H264 hack
henry
parents: 17223
diff changeset
536 vf->priv->mpeg2= mpi->qscale_type;
17133
a2b24e0d7772 prevent flicker on b-frames, trivial port from vf_spp
henry
parents: 17012
diff changeset
537 if(mpi->pict_type != 3 && mpi->qscale && !vf->priv->qp){
30363
915be5c7a30c Make sure that a qstride of 0 (intentional or not) does not completely break
reimar
parents: 29310
diff changeset
538 int w = mpi->qstride;
915be5c7a30c Make sure that a qstride of 0 (intentional or not) does not completely break
reimar
parents: 29310
diff changeset
539 int h = (mpi->h + 15) >> 4;
915be5c7a30c Make sure that a qstride of 0 (intentional or not) does not completely break
reimar
parents: 29310
diff changeset
540 if (!w) {
915be5c7a30c Make sure that a qstride of 0 (intentional or not) does not completely break
reimar
parents: 29310
diff changeset
541 w = (mpi->w + 15) >> 4;
915be5c7a30c Make sure that a qstride of 0 (intentional or not) does not completely break
reimar
parents: 29310
diff changeset
542 h = 1;
915be5c7a30c Make sure that a qstride of 0 (intentional or not) does not completely break
reimar
parents: 29310
diff changeset
543 }
915be5c7a30c Make sure that a qstride of 0 (intentional or not) does not completely break
reimar
parents: 29310
diff changeset
544 if(!vf->priv->non_b_qp)
915be5c7a30c Make sure that a qstride of 0 (intentional or not) does not completely break
reimar
parents: 29310
diff changeset
545 vf->priv->non_b_qp= malloc(w*h);
915be5c7a30c Make sure that a qstride of 0 (intentional or not) does not completely break
reimar
parents: 29310
diff changeset
546 fast_memcpy(vf->priv->non_b_qp, mpi->qscale, w*h);
17133
a2b24e0d7772 prevent flicker on b-frames, trivial port from vf_spp
henry
parents: 17012
diff changeset
547 }
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
548 if(vf->priv->log2_count || !(mpi->flags&MP_IMGFLAG_DIRECT)){
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
549 char *qp_tab= vf->priv->non_b_qp;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
550 if(vf->priv->bframes || !qp_tab)
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
551 qp_tab= mpi->qscale;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
552
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
553 if(qp_tab || vf->priv->qp){
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
554 filter(vf->priv, dmpi->planes[0], mpi->planes[0], dmpi->stride[0], mpi->stride[0],
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
555 mpi->w, mpi->h, qp_tab, mpi->qstride, 1);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
556 filter(vf->priv, dmpi->planes[1], mpi->planes[1], dmpi->stride[1], mpi->stride[1],
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
557 mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, qp_tab, mpi->qstride, 0);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
558 filter(vf->priv, dmpi->planes[2], mpi->planes[2], dmpi->stride[2], mpi->stride[2],
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
559 mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, qp_tab, mpi->qstride, 0);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
560 }else{
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
561 memcpy_pic(dmpi->planes[0], mpi->planes[0], mpi->w, mpi->h, dmpi->stride[0], mpi->stride[0]);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
562 memcpy_pic(dmpi->planes[1], mpi->planes[1], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[1], mpi->stride[1]);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
563 memcpy_pic(dmpi->planes[2], mpi->planes[2], mpi->w>>mpi->chroma_x_shift, mpi->h>>mpi->chroma_y_shift, dmpi->stride[2], mpi->stride[2]);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
564 }
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
565 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
566
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
567 #if HAVE_MMX
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 26727
diff changeset
568 if(gCpuCaps.hasMMX) __asm__ volatile ("emms\n\t");
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
569 #endif
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
570 #if HAVE_MMX2
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 26727
diff changeset
571 if(gCpuCaps.hasMMX2) __asm__ volatile ("sfence\n\t");
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
572 #endif
17906
20aca9baf5d8 passing pts through the filter layer (lets see if pts or cola comes out at the end)
michael
parents: 17523
diff changeset
573 return vf_next_put_image(vf,dmpi, pts);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
574 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
575
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
576 static void uninit(struct vf_instance *vf)
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
577 {
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
578 if(!vf->priv) return;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
579
32537
8fa2f43cb760 Remove most of the NULL pointer check before free all over the code
cboesch
parents: 31959
diff changeset
580 av_free(vf->priv->temp);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
581 vf->priv->temp= NULL;
32537
8fa2f43cb760 Remove most of the NULL pointer check before free all over the code
cboesch
parents: 31959
diff changeset
582 av_free(vf->priv->src);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
583 vf->priv->src= NULL;
32537
8fa2f43cb760 Remove most of the NULL pointer check before free all over the code
cboesch
parents: 31959
diff changeset
584 //free(vf->priv->avctx);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
585 //vf->priv->avctx= NULL;
32537
8fa2f43cb760 Remove most of the NULL pointer check before free all over the code
cboesch
parents: 31959
diff changeset
586 free(vf->priv->non_b_qp);
17133
a2b24e0d7772 prevent flicker on b-frames, trivial port from vf_spp
henry
parents: 17012
diff changeset
587 vf->priv->non_b_qp= NULL;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
588
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
589 av_free(vf->priv);
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
590 vf->priv=NULL;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
591 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
592
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
593 //===========================================================================//
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
594
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
595 static int query_format(struct vf_instance *vf, unsigned int fmt)
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
596 {
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
597 switch(fmt){
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
598 case IMGFMT_YVU9:
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
599 case IMGFMT_IF09:
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
600 case IMGFMT_YV12:
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
601 case IMGFMT_I420:
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
602 case IMGFMT_IYUV:
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
603 case IMGFMT_CLPL:
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
604 case IMGFMT_Y800:
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
605 case IMGFMT_Y8:
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
606 case IMGFMT_444P:
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
607 case IMGFMT_422P:
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
608 case IMGFMT_411P:
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
609 return vf_next_query_format(vf,fmt);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
610 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
611 return 0;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
612 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
613
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
614 static int control(struct vf_instance *vf, int request, void* data)
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
615 {
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
616 switch(request){
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
617 case VFCTRL_QUERY_MAX_PP_LEVEL:
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
618 return 5;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
619 case VFCTRL_SET_PP_LEVEL:
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
620 vf->priv->log2_count= *((unsigned int*)data);
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
621 if (vf->priv->log2_count < 4) vf->priv->log2_count=4;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
622 return CONTROL_TRUE;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
623 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
624 return vf_next_control(vf,request,data);
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
625 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
626
30638
a7b908875c14 Rename open() vf initialization function to vf_open().
diego
parents: 30412
diff changeset
627 static int vf_open(vf_instance_t *vf, char *args)
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
628 {
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
629 int i=0, bias;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
630 int custom_threshold_m[64];
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
631 int log2c=-1;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
632
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
633 vf->config=config;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
634 vf->put_image=put_image;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
635 vf->get_image=get_image;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
636 vf->query_format=query_format;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
637 vf->uninit=uninit;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
638 vf->control= control;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
639 vf->priv=av_mallocz(sizeof(struct vf_priv_s));//assumes align 16 !
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
640
31959
f957f330aa6d Introduce init_avcodec function to avoid duplicated FFmpeg initializations.
diego
parents: 30642
diff changeset
641 init_avcodec();
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
642
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
643 //vf->priv->avctx= avcodec_alloc_context();
34728
a28c4ba97171 Replace use of deprecated dsputil_init() by ff_dsputil_init().
diego
parents: 34198
diff changeset
644 //ff_dsputil_init(&vf->priv->dsp, vf->priv->avctx);
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
645
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
646 vf->priv->log2_count= 4;
17225
ec9888363742 reverse the H264 hack
henry
parents: 17223
diff changeset
647 vf->priv->bframes = 0;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
648
17225
ec9888363742 reverse the H264 hack
henry
parents: 17223
diff changeset
649 if (args) sscanf(args, "%d:%d:%d:%d", &log2c, &vf->priv->qp, &i, &vf->priv->bframes);
ec9888363742 reverse the H264 hack
henry
parents: 17223
diff changeset
650
ec9888363742 reverse the H264 hack
henry
parents: 17223
diff changeset
651 if( log2c >=4 && log2c <=5 )
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
652 vf->priv->log2_count = log2c;
15651
6a0494e09435 sanity checks for options; treat quality > 5 as 5, not 4
henry
parents: 15634
diff changeset
653 else if( log2c >= 6 )
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
654 vf->priv->log2_count = 5;
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
655
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
656 if(vf->priv->qp < 0)
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
657 vf->priv->qp = 0;
15651
6a0494e09435 sanity checks for options; treat quality > 5 as 5, not 4
henry
parents: 15634
diff changeset
658
6a0494e09435 sanity checks for options; treat quality > 5 as 5, not 4
henry
parents: 15634
diff changeset
659 if (i < -15) i = -15;
6a0494e09435 sanity checks for options; treat quality > 5 as 5, not 4
henry
parents: 15634
diff changeset
660 if (i > 32) i = 32;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
661
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
662 bias= (1<<4)+i; //regulable
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
663 vf->priv->prev_q=0;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
664 //
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
665 for(i=0;i<64;i++) //FIXME: tune custom_threshold[] and remove this !
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
666 custom_threshold_m[i]=(int)(custom_threshold[i]*(bias/71.)+ 0.5);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
667 for(i=0;i<8;i++){
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
668 vf->priv->threshold_mtx_noq[2*i]=(uint64_t)custom_threshold_m[i*8+2]
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
669 |(((uint64_t)custom_threshold_m[i*8+6])<<16)
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
670 |(((uint64_t)custom_threshold_m[i*8+0])<<32)
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
671 |(((uint64_t)custom_threshold_m[i*8+4])<<48);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
672 vf->priv->threshold_mtx_noq[2*i+1]=(uint64_t)custom_threshold_m[i*8+5]
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
673 |(((uint64_t)custom_threshold_m[i*8+3])<<16)
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
674 |(((uint64_t)custom_threshold_m[i*8+1])<<32)
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
675 |(((uint64_t)custom_threshold_m[i*8+7])<<48);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
676 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
677
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
678 if (vf->priv->qp) vf->priv->prev_q=vf->priv->qp, mul_thrmat_s(vf->priv, vf->priv->qp);
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
679
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
680 return 1;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
681 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
682
25221
00fff9a3b735 Make all vf_info_t structs const
reimar
parents: 24976
diff changeset
683 const vf_info_t vf_info_fspp = {
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
684 "fast simple postprocess",
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
685 "fspp",
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
686 "Michael Niedermayer, Nikolaj Poroshin",
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
687 "",
30638
a7b908875c14 Rename open() vf initialization function to vf_open().
diego
parents: 30412
diff changeset
688 vf_open,
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
689 NULL
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
690 };
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
691
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
692 //====================================================================
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
693 //Specific spp's dct, idct and threshold functions
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
694 //I'd prefer to have them in the separate file.
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
695
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
696 //#define MANGLE(a) #a
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
697
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
698 //typedef int16_t DCTELEM; //! only int16_t
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
699
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
700 #define DCTSIZE 8
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
701 #define DCTSIZE_S "8"
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
702
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
703 #define FIX(x,s) ((int) ((x) * (1<<s) + 0.5)&0xffff)
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
704 #define C64(x) ((uint64_t)((x)|(x)<<16))<<32 | (uint64_t)(x) | (uint64_t)(x)<<16
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
705 #define FIX64(x,s) C64(FIX(x,s))
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
706
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
707 #define MULTIPLY16H(x,k) (((x)*(k))>>16)
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
708 #define THRESHOLD(r,x,t) if(((unsigned)((x)+t))>t*2) r=(x);else r=0;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
709 #define DESCALE(x,n) (((x) + (1 << ((n)-1))) >> n)
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
710
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
711 #if HAVE_MMX
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
712
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
713 DECLARE_ASM_CONST(8, uint64_t, MM_FIX_0_382683433)=FIX64(0.382683433, 14);
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
714 DECLARE_ASM_CONST(8, uint64_t, MM_FIX_0_541196100)=FIX64(0.541196100, 14);
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
715 DECLARE_ASM_CONST(8, uint64_t, MM_FIX_0_707106781)=FIX64(0.707106781, 14);
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
716 DECLARE_ASM_CONST(8, uint64_t, MM_FIX_1_306562965)=FIX64(1.306562965, 14);
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
717
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
718 DECLARE_ASM_CONST(8, uint64_t, MM_FIX_1_414213562_A)=FIX64(1.414213562, 14);
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
719
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
720 DECLARE_ASM_CONST(8, uint64_t, MM_FIX_1_847759065)=FIX64(1.847759065, 13);
25901
c2210e68a2a9 Simplify: use DECLARE_ASM_CONST
reimar
parents: 25568
diff changeset
721 DECLARE_ASM_CONST(8, uint64_t, MM_FIX_2_613125930)=FIX64(-2.613125930, 13); //-
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
722 DECLARE_ASM_CONST(8, uint64_t, MM_FIX_1_414213562)=FIX64(1.414213562, 13);
25901
c2210e68a2a9 Simplify: use DECLARE_ASM_CONST
reimar
parents: 25568
diff changeset
723 DECLARE_ASM_CONST(8, uint64_t, MM_FIX_1_082392200)=FIX64(1.082392200, 13);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
724 //for t3,t5,t7 == 0 shortcut
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
725 DECLARE_ASM_CONST(8, uint64_t, MM_FIX_0_847759065)=FIX64(0.847759065, 14);
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
726 DECLARE_ASM_CONST(8, uint64_t, MM_FIX_0_566454497)=FIX64(0.566454497, 14);
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
727 DECLARE_ASM_CONST(8, uint64_t, MM_FIX_0_198912367)=FIX64(0.198912367, 14);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
728
25901
c2210e68a2a9 Simplify: use DECLARE_ASM_CONST
reimar
parents: 25568
diff changeset
729 DECLARE_ASM_CONST(8, uint64_t, MM_DESCALE_RND)=C64(4);
c2210e68a2a9 Simplify: use DECLARE_ASM_CONST
reimar
parents: 25568
diff changeset
730 DECLARE_ASM_CONST(8, uint64_t, MM_2)=C64(2);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
731
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
732 #else /* !HAVE_MMX */
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
733
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
734 typedef int32_t int_simd16_t;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
735 static const int16_t FIX_0_382683433=FIX(0.382683433, 14);
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
736 static const int16_t FIX_0_541196100=FIX(0.541196100, 14);
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
737 static const int16_t FIX_0_707106781=FIX(0.707106781, 14);
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
738 static const int16_t FIX_1_306562965=FIX(1.306562965, 14);
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
739 static const int16_t FIX_1_414213562_A=FIX(1.414213562, 14);
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
740 static const int16_t FIX_1_847759065=FIX(1.847759065, 13);
25902
15ab840747e2 mark constants as such
reimar
parents: 25901
diff changeset
741 static const int16_t FIX_2_613125930=FIX(-2.613125930, 13); //-
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
742 static const int16_t FIX_1_414213562=FIX(1.414213562, 13);
25902
15ab840747e2 mark constants as such
reimar
parents: 25901
diff changeset
743 static const int16_t FIX_1_082392200=FIX(1.082392200, 13);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
744
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
745 #endif
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
746
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
747 #if !HAVE_MMX
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
748
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
749 static void column_fidct_c(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int cnt)
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
750 {
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
751 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
752 int_simd16_t tmp10, tmp11, tmp12, tmp13;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
753 int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
754 int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
755
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
756 DCTELEM* dataptr;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
757 DCTELEM* wsptr;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
758 int16_t *threshold;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
759 int ctr;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
760
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
761 dataptr = data;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
762 wsptr = output;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
763
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
764 for (; cnt > 0; cnt-=2) { //start positions
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
765 threshold=(int16_t*)thr_adr;//threshold_mtx
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
766 for (ctr = DCTSIZE; ctr > 0; ctr--) {
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
767 // Process columns from input, add to output.
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
768 tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
769 tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
770
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
771 tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
772 tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
773
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
774 tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
775 tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
776
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
777 tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
778 tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
779
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
780 // Even part of FDCT
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
781
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
782 tmp10 = tmp0 + tmp3;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
783 tmp13 = tmp0 - tmp3;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
784 tmp11 = tmp1 + tmp2;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
785 tmp12 = tmp1 - tmp2;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
786
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
787 d0 = tmp10 + tmp11;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
788 d4 = tmp10 - tmp11;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
789
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
790 z1 = MULTIPLY16H((tmp12 + tmp13) <<2, FIX_0_707106781);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
791 d2 = tmp13 + z1;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
792 d6 = tmp13 - z1;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
793
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
794 // Even part of IDCT
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
795
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
796 THRESHOLD(tmp0, d0, threshold[0*8]);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
797 THRESHOLD(tmp1, d2, threshold[2*8]);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
798 THRESHOLD(tmp2, d4, threshold[4*8]);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
799 THRESHOLD(tmp3, d6, threshold[6*8]);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
800 tmp0+=2;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
801 tmp10 = (tmp0 + tmp2)>>2;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
802 tmp11 = (tmp0 - tmp2)>>2;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
803
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
804 tmp13 = (tmp1 + tmp3)>>2; //+2 ! (psnr decides)
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
805 tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
806
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
807 tmp0 = tmp10 + tmp13; //->temps
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
808 tmp3 = tmp10 - tmp13; //->temps
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
809 tmp1 = tmp11 + tmp12; //->temps
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
810 tmp2 = tmp11 - tmp12; //->temps
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
811
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
812 // Odd part of FDCT
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
813
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
814 tmp10 = tmp4 + tmp5;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
815 tmp11 = tmp5 + tmp6;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
816 tmp12 = tmp6 + tmp7;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
817
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
818 z5 = MULTIPLY16H((tmp10 - tmp12)<<2, FIX_0_382683433);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
819 z2 = MULTIPLY16H(tmp10 <<2, FIX_0_541196100) + z5;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
820 z4 = MULTIPLY16H(tmp12 <<2, FIX_1_306562965) + z5;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
821 z3 = MULTIPLY16H(tmp11 <<2, FIX_0_707106781);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
822
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
823 z11 = tmp7 + z3;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
824 z13 = tmp7 - z3;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
825
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
826 d5 = z13 + z2;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
827 d3 = z13 - z2;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
828 d1 = z11 + z4;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
829 d7 = z11 - z4;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
830
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
831 // Odd part of IDCT
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
832
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
833 THRESHOLD(tmp4, d1, threshold[1*8]);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
834 THRESHOLD(tmp5, d3, threshold[3*8]);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
835 THRESHOLD(tmp6, d5, threshold[5*8]);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
836 THRESHOLD(tmp7, d7, threshold[7*8]);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
837
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
838 //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
839 z13 = tmp6 + tmp5;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
840 z10 = (tmp6 - tmp5)<<1;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
841 z11 = tmp4 + tmp7;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
842 z12 = (tmp4 - tmp7)<<1;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
843
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
844 tmp7 = (z11 + z13)>>2; //+2 !
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
845 tmp11 = MULTIPLY16H((z11 - z13)<<1, FIX_1_414213562);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
846 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
847 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
848 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
849
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
850 tmp6 = tmp12 - tmp7;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
851 tmp5 = tmp11 - tmp6;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
852 tmp4 = tmp10 + tmp5;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
853
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
854 wsptr[DCTSIZE*0]+= (tmp0 + tmp7);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
855 wsptr[DCTSIZE*1]+= (tmp1 + tmp6);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
856 wsptr[DCTSIZE*2]+= (tmp2 + tmp5);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
857 wsptr[DCTSIZE*3]+= (tmp3 - tmp4);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
858 wsptr[DCTSIZE*4]+= (tmp3 + tmp4);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
859 wsptr[DCTSIZE*5]+= (tmp2 - tmp5);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
860 wsptr[DCTSIZE*6]= (tmp1 - tmp6);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
861 wsptr[DCTSIZE*7]= (tmp0 - tmp7);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
862 //
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
863 dataptr++; //next column
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
864 wsptr++;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
865 threshold++;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
866 }
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
867 dataptr+=8; //skip each second start pos
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
868 wsptr +=8;
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
869 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
870 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
871
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
872 #else /* HAVE_MMX */
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
873
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
874 static void column_fidct_mmx(int16_t* thr_adr, DCTELEM *data, DCTELEM *output, int cnt)
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
875 {
26052
ce480034f391 Do not use a global temps variable, this is ugly and does not compile with ICC.
reimar
parents: 26050
diff changeset
876 uint64_t __attribute__((aligned(8))) temps[4];
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 26727
diff changeset
877 __asm__ volatile(
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
878 ASMALIGN(4)
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
879 "1: \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
880 "movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
881 //
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
882 "movq "DCTSIZE_S"*3*2(%%"REG_S"), %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
883 "movq %%mm1, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
884
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
885 "paddw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm1 \n\t" //t0
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
886 "movq %%mm7, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
887
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
888 "paddw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm7 \n\t" //t3
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
889 "movq %%mm1, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
890
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
891 "movq "DCTSIZE_S"*1*2(%%"REG_S"), %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
892 "psubw %%mm7, %%mm1 \n\t" //t13
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
893
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
894 "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
895 "movq %%mm6, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
896
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
897 "paddw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm6 \n\t" //t1
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
898 "paddw %%mm7, %%mm5 \n\t" //t10
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
899
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
900 "paddw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
901 "movq %%mm6, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
902
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
903 "paddw %%mm2, %%mm6 \n\t" //t11
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
904 "psubw %%mm2, %%mm7 \n\t" //t12
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
905
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
906 "movq %%mm5, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
907 "paddw %%mm6, %%mm5 \n\t" //d0
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
908 // i0 t13 t12 i3 i1 d0 - d4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
909 "psubw %%mm6, %%mm2 \n\t" //d4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
910 "paddw %%mm1, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
911
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
912 "movq 4*16(%%"REG_d"), %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
913 "psllw $2, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
914
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
915 "psubw 0*16(%%"REG_d"), %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
916 "psubw %%mm6, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
917
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
918 "paddusw 0*16(%%"REG_d"), %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
919 "paddusw %%mm6, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
920
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
921 "pmulhw "MANGLE(MM_FIX_0_707106781)", %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
922 //
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
923 "paddw 0*16(%%"REG_d"), %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
924 "paddw %%mm6, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
925
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
926 "psubusw 0*16(%%"REG_d"), %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
927 "psubusw %%mm6, %%mm2 \n\t"
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
928
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
929 //This func is totally compute-bound, operates at huge speed. So, DC shortcut
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
930 // at this place isn't worthwhile due to BTB miss penalty (checked on Pent. 3).
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
931 //However, typical numbers: nondc - 29%%, dc - 46%%, zero - 25%%. All <> 0 case is very rare.
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
932 "paddw "MANGLE(MM_2)", %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
933 "movq %%mm2, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
934
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
935 "paddw %%mm5, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
936 "psubw %%mm6, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
937
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
938 "movq %%mm1, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
939 "paddw %%mm7, %%mm1 \n\t" //d2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
940
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
941 "psubw 2*16(%%"REG_d"), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
942 "psubw %%mm7, %%mm6 \n\t" //d6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
943
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
944 "movq 6*16(%%"REG_d"), %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
945 "psraw $2, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
946
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
947 "paddusw 2*16(%%"REG_d"), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
948 "psubw %%mm7, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
949 // t7 d2 /t11 t4 t6 - d6 /t10
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
950
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
951 "paddw 2*16(%%"REG_d"), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
952 "paddusw %%mm7, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
953
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
954 "psubusw 2*16(%%"REG_d"), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
955 "paddw %%mm7, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
956
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
957 "psubw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
958 "psubusw %%mm7, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
959
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
960 //movq [edi+"DCTSIZE_S"*2*2], mm1
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
961 //movq [edi+"DCTSIZE_S"*6*2], mm6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
962 "movq %%mm1, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
963 "psraw $2, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
964
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
965 "psubw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
966 "psubw %%mm6, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
967
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
968 "psubw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
969 "paddw %%mm7, %%mm6 \n\t" //'t13
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
970
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
971 "psraw $2, %%mm6 \n\t" //paddw mm6, MM_2 !! ---
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
972 "movq %%mm2, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
973
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
974 "pmulhw "MANGLE(MM_FIX_1_414213562_A)", %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
975 "paddw %%mm6, %%mm2 \n\t" //'t0
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
976
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
977 "movq %%mm2, 0*8+%3 \n\t" //!
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
978 "psubw %%mm6, %%mm7 \n\t" //'t3
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
979
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
980 "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
981 "psubw %%mm6, %%mm1 \n\t" //'t12
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
982
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
983 "psubw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
984 "movq %%mm5, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
985
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
986 "movq %%mm7, 3*8+%3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
987 "paddw %%mm2, %%mm3 \n\t" //t10
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
988
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
989 "paddw %%mm4, %%mm2 \n\t" //t11
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
990 "paddw %%mm0, %%mm4 \n\t" //t12
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
991
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
992 "movq %%mm3, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
993 "psubw %%mm4, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
994
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
995 "psllw $2, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
996 "psllw $2, %%mm7 \n\t" //opt for P6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
997
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
998 "pmulhw "MANGLE(MM_FIX_0_382683433)", %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
999 "psllw $2, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1000
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1001 "pmulhw "MANGLE(MM_FIX_0_541196100)", %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1002 "psllw $2, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1003
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1004 "pmulhw "MANGLE(MM_FIX_1_306562965)", %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1005 "paddw %%mm1, %%mm5 \n\t" //'t1
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1006
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1007 "pmulhw "MANGLE(MM_FIX_0_707106781)", %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1008 "psubw %%mm1, %%mm6 \n\t" //'t2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1009 // t7 't12 't11 t4 t6 - 't13 't10 ---
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1010
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1011 "paddw %%mm3, %%mm7 \n\t" //z2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1012
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1013 "movq %%mm5, 1*8+%3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1014 "paddw %%mm3, %%mm4 \n\t" //z4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1015
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1016 "movq 3*16(%%"REG_d"), %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1017 "movq %%mm0, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1018
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1019 "movq %%mm6, 2*8+%3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1020 "psubw %%mm2, %%mm1 \n\t" //z13
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1021
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1022 //===
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1023 "paddw %%mm2, %%mm0 \n\t" //z11
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1024 "movq %%mm1, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1025
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1026 "movq 5*16(%%"REG_d"), %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1027 "psubw %%mm7, %%mm1 \n\t" //d3
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1028
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1029 "paddw %%mm7, %%mm5 \n\t" //d5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1030 "psubw %%mm3, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1031
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1032 "movq 1*16(%%"REG_d"), %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1033 "psubw %%mm2, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1034
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1035 "movq %%mm0, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1036 "paddw %%mm4, %%mm0 \n\t" //d1
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1037
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1038 "paddusw %%mm3, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1039 "psubw %%mm4, %%mm6 \n\t" //d7
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1040
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1041 // d1 d3 - - - d5 d7 -
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1042 "movq 7*16(%%"REG_d"), %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1043 "psubw %%mm7, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1044
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1045 "psubw %%mm4, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1046 "paddusw %%mm2, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1047
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1048 "paddusw %%mm4, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1049 "paddw %%mm3, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1050
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1051 "paddw %%mm2, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1052 "paddw %%mm4, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1053
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1054 "psubusw %%mm3, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1055 "psubusw %%mm2, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1056
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1057 "psubusw %%mm4, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1058 "movq %%mm1, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1059
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1060 "por %%mm5, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1061 "paddusw %%mm7, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1062
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1063 "por %%mm6, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1064 "paddw %%mm7, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1065
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1066 "packssdw %%mm4, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1067 "psubusw %%mm7, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1068
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1069 "movd %%mm4, %%"REG_a" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1070 "or %%"REG_a", %%"REG_a" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1071 "jnz 2f \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1072 //movq [edi+"DCTSIZE_S"*3*2], mm1
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1073 //movq [edi+"DCTSIZE_S"*5*2], mm5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1074 //movq [edi+"DCTSIZE_S"*1*2], mm0
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1075 //movq [edi+"DCTSIZE_S"*7*2], mm6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1076 // t4 t5 - - - t6 t7 -
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1077 //--- t4 (mm0) may be <>0; mm1, mm5, mm6 == 0
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1078 //Typical numbers: nondc - 19%%, dc - 26%%, zero - 55%%. zero case alone isn't worthwhile
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1079 "movq 0*8+%3, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1080 "movq %%mm0, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1081
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1082 "pmulhw "MANGLE(MM_FIX_0_847759065)", %%mm0 \n\t" //tmp6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1083 "movq %%mm1, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1084
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1085 "movq "DCTSIZE_S"*0*2(%%"REG_D"), %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1086 "movq %%mm2, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1087
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1088 "pmulhw "MANGLE(MM_FIX_0_566454497)", %%mm1 \n\t" //tmp5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1089 "paddw %%mm4, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1090
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1091 "movq 1*8+%3, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1092 //paddw mm3, MM_2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1093 "psraw $2, %%mm3 \n\t" //tmp7
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1094
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1095 "pmulhw "MANGLE(MM_FIX_0_198912367)", %%mm2 \n\t" //-tmp4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1096 "psubw %%mm3, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1097
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1098 "movq "DCTSIZE_S"*1*2(%%"REG_D"), %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1099 "paddw %%mm3, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1100
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1101 "movq %%mm4, "DCTSIZE_S"*7*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1102 "paddw %%mm6, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1103
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1104 "movq 2*8+%3, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1105 "psubw %%mm0, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1106
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1107 "movq "DCTSIZE_S"*2*2(%%"REG_D"), %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1108 "paddw %%mm0, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1109
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1110 "movq %%mm5, "DCTSIZE_S"*0*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1111 "paddw %%mm3, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1112
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1113 "movq %%mm6, "DCTSIZE_S"*6*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1114 "psubw %%mm1, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1115
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1116 "movq "DCTSIZE_S"*5*2(%%"REG_D"), %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1117 "paddw %%mm1, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1118
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1119 "movq "DCTSIZE_S"*3*2(%%"REG_D"), %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1120 "paddw %%mm3, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1121
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1122 "movq 3*8+%3, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1123 "add $8, %%"REG_S" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1124
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1125 "movq %%mm7, "DCTSIZE_S"*1*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1126 "paddw %%mm0, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1127
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1128 "movq %%mm4, "DCTSIZE_S"*2*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1129 "psubw %%mm2, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1130
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1131 "movq "DCTSIZE_S"*4*2(%%"REG_D"), %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1132 "paddw %%mm2, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1133
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1134 "movq %%mm5, "DCTSIZE_S"*5*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1135 "paddw %%mm0, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1136
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1137 "movq %%mm6, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1138
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1139 "movq %%mm7, "DCTSIZE_S"*4*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1140 "add $8, %%"REG_D" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1141 "jmp 4f \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1142
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1143 "2: \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1144 //--- non DC2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1145 //psraw mm1, 2 w/o it -> offset. thr1, thr1, thr1 (actually thr1, thr1, thr1-1)
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1146 //psraw mm5, 2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1147 //psraw mm0, 2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1148 //psraw mm6, 2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1149 "movq %%mm5, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1150 "psubw %%mm1, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1151
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1152 "psllw $1, %%mm5 \n\t" //'z10
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1153 "paddw %%mm1, %%mm3 \n\t" //'z13
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1154
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1155 "movq %%mm0, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1156 "psubw %%mm6, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1157
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1158 "movq %%mm5, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1159 "psllw $1, %%mm0 \n\t" //'z12
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1160
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1161 "pmulhw "MANGLE(MM_FIX_2_613125930)", %%mm1 \n\t" //-
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1162 "paddw %%mm0, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1163
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1164 "pmulhw "MANGLE(MM_FIX_1_847759065)", %%mm5 \n\t" //'z5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1165 "paddw %%mm6, %%mm2 \n\t" //'z11
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1166
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1167 "pmulhw "MANGLE(MM_FIX_1_082392200)", %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1168 "movq %%mm2, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1169
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1170 //---
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1171 "movq 0*8+%3, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1172 "psubw %%mm3, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1173
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1174 "psllw $1, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1175 "paddw %%mm3, %%mm7 \n\t" //'t7
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1176
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1177 "pmulhw "MANGLE(MM_FIX_1_414213562)", %%mm2 \n\t" //'t11
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1178 "movq %%mm4, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1179 //paddw mm7, MM_2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1180 "psraw $2, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1181
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1182 "paddw "DCTSIZE_S"*0*2(%%"REG_D"), %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1183 "psubw %%mm7, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1184
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1185 "movq 1*8+%3, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1186 "paddw %%mm7, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1187
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1188 "movq %%mm6, "DCTSIZE_S"*7*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1189 "paddw %%mm5, %%mm1 \n\t" //'t12
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1190
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1191 "movq %%mm4, "DCTSIZE_S"*0*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1192 "psubw %%mm7, %%mm1 \n\t" //'t6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1193
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1194 "movq 2*8+%3, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1195 "psubw %%mm5, %%mm0 \n\t" //'t10
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1196
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1197 "movq 3*8+%3, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1198 "movq %%mm3, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1199
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1200 "paddw "DCTSIZE_S"*1*2(%%"REG_D"), %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1201 "psubw %%mm1, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1202
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1203 "psubw %%mm1, %%mm2 \n\t" //'t5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1204 "paddw %%mm1, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1205
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1206 "movq %%mm5, "DCTSIZE_S"*6*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1207 "movq %%mm7, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1208
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1209 "paddw "DCTSIZE_S"*2*2(%%"REG_D"), %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1210 "psubw %%mm2, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1211
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1212 "paddw "DCTSIZE_S"*5*2(%%"REG_D"), %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1213 "paddw %%mm2, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1214
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1215 "movq %%mm3, "DCTSIZE_S"*1*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1216 "paddw %%mm2, %%mm0 \n\t" //'t4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1217
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1218 // 't4 't6 't5 - - - - 't7
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1219 "movq %%mm7, "DCTSIZE_S"*2*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1220 "movq %%mm6, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1221
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1222 "paddw "DCTSIZE_S"*4*2(%%"REG_D"), %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1223 "psubw %%mm0, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1224
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1225 "paddw "DCTSIZE_S"*3*2(%%"REG_D"), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1226 "paddw %%mm0, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1227
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1228 "movq %%mm4, "DCTSIZE_S"*5*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1229 "add $8, %%"REG_S" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1230
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1231 "movq %%mm6, "DCTSIZE_S"*4*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1232
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1233 "movq %%mm1, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1234 "add $8, %%"REG_D" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1235
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1236 "4: \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
1237 //=part 2 (the same)===========================================================
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1238 "movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1239 //
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1240 "movq "DCTSIZE_S"*3*2(%%"REG_S"), %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1241 "movq %%mm1, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1242
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1243 "paddw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm1 \n\t" //t0
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1244 "movq %%mm7, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1245
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1246 "paddw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm7 \n\t" //t3
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1247 "movq %%mm1, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1248
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1249 "movq "DCTSIZE_S"*1*2(%%"REG_S"), %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1250 "psubw %%mm7, %%mm1 \n\t" //t13
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1251
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1252 "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1253 "movq %%mm6, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1254
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1255 "paddw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm6 \n\t" //t1
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1256 "paddw %%mm7, %%mm5 \n\t" //t10
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1257
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1258 "paddw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1259 "movq %%mm6, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1260
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1261 "paddw %%mm2, %%mm6 \n\t" //t11
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1262 "psubw %%mm2, %%mm7 \n\t" //t12
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1263
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1264 "movq %%mm5, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1265 "paddw %%mm6, %%mm5 \n\t" //d0
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1266 // i0 t13 t12 i3 i1 d0 - d4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1267 "psubw %%mm6, %%mm2 \n\t" //d4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1268 "paddw %%mm1, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1269
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1270 "movq 1*8+4*16(%%"REG_d"), %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1271 "psllw $2, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1272
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1273 "psubw 1*8+0*16(%%"REG_d"), %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1274 "psubw %%mm6, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1275
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1276 "paddusw 1*8+0*16(%%"REG_d"), %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1277 "paddusw %%mm6, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1278
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1279 "pmulhw "MANGLE(MM_FIX_0_707106781)", %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1280 //
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1281 "paddw 1*8+0*16(%%"REG_d"), %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1282 "paddw %%mm6, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1283
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1284 "psubusw 1*8+0*16(%%"REG_d"), %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1285 "psubusw %%mm6, %%mm2 \n\t"
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1286
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1287 //This func is totally compute-bound, operates at huge speed. So, DC shortcut
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1288 // at this place isn't worthwhile due to BTB miss penalty (checked on Pent. 3).
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1289 //However, typical numbers: nondc - 29%%, dc - 46%%, zero - 25%%. All <> 0 case is very rare.
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1290 "paddw "MANGLE(MM_2)", %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1291 "movq %%mm2, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1292
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1293 "paddw %%mm5, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1294 "psubw %%mm6, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1295
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1296 "movq %%mm1, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1297 "paddw %%mm7, %%mm1 \n\t" //d2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1298
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1299 "psubw 1*8+2*16(%%"REG_d"), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1300 "psubw %%mm7, %%mm6 \n\t" //d6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1301
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1302 "movq 1*8+6*16(%%"REG_d"), %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1303 "psraw $2, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1304
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1305 "paddusw 1*8+2*16(%%"REG_d"), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1306 "psubw %%mm7, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1307 // t7 d2 /t11 t4 t6 - d6 /t10
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1308
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1309 "paddw 1*8+2*16(%%"REG_d"), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1310 "paddusw %%mm7, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1311
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1312 "psubusw 1*8+2*16(%%"REG_d"), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1313 "paddw %%mm7, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1314
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1315 "psubw "DCTSIZE_S"*4*2(%%"REG_S"), %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1316 "psubusw %%mm7, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1317
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1318 //movq [edi+"DCTSIZE_S"*2*2], mm1
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1319 //movq [edi+"DCTSIZE_S"*6*2], mm6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1320 "movq %%mm1, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1321 "psraw $2, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1322
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1323 "psubw "DCTSIZE_S"*6*2(%%"REG_S"), %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1324 "psubw %%mm6, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1325
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1326 "psubw "DCTSIZE_S"*7*2(%%"REG_S"), %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1327 "paddw %%mm7, %%mm6 \n\t" //'t13
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1328
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1329 "psraw $2, %%mm6 \n\t" //paddw mm6, MM_2 !! ---
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1330 "movq %%mm2, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1331
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1332 "pmulhw "MANGLE(MM_FIX_1_414213562_A)", %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1333 "paddw %%mm6, %%mm2 \n\t" //'t0
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1334
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1335 "movq %%mm2, 0*8+%3 \n\t" //!
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1336 "psubw %%mm6, %%mm7 \n\t" //'t3
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1337
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1338 "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1339 "psubw %%mm6, %%mm1 \n\t" //'t12
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1340
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1341 "psubw "DCTSIZE_S"*5*2(%%"REG_S"), %%mm2 \n\t" //t5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1342 "movq %%mm5, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1343
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1344 "movq %%mm7, 3*8+%3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1345 "paddw %%mm2, %%mm3 \n\t" //t10
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1346
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1347 "paddw %%mm4, %%mm2 \n\t" //t11
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1348 "paddw %%mm0, %%mm4 \n\t" //t12
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1349
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1350 "movq %%mm3, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1351 "psubw %%mm4, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1352
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1353 "psllw $2, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1354 "psllw $2, %%mm7 \n\t" //opt for P6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1355
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1356 "pmulhw "MANGLE(MM_FIX_0_382683433)", %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1357 "psllw $2, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1358
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1359 "pmulhw "MANGLE(MM_FIX_0_541196100)", %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1360 "psllw $2, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1361
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1362 "pmulhw "MANGLE(MM_FIX_1_306562965)", %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1363 "paddw %%mm1, %%mm5 \n\t" //'t1
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1364
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1365 "pmulhw "MANGLE(MM_FIX_0_707106781)", %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1366 "psubw %%mm1, %%mm6 \n\t" //'t2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1367 // t7 't12 't11 t4 t6 - 't13 't10 ---
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1368
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1369 "paddw %%mm3, %%mm7 \n\t" //z2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1370
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1371 "movq %%mm5, 1*8+%3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1372 "paddw %%mm3, %%mm4 \n\t" //z4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1373
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1374 "movq 1*8+3*16(%%"REG_d"), %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1375 "movq %%mm0, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1376
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1377 "movq %%mm6, 2*8+%3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1378 "psubw %%mm2, %%mm1 \n\t" //z13
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1379
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1380 //===
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1381 "paddw %%mm2, %%mm0 \n\t" //z11
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1382 "movq %%mm1, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1383
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1384 "movq 1*8+5*16(%%"REG_d"), %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1385 "psubw %%mm7, %%mm1 \n\t" //d3
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1386
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1387 "paddw %%mm7, %%mm5 \n\t" //d5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1388 "psubw %%mm3, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1389
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1390 "movq 1*8+1*16(%%"REG_d"), %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1391 "psubw %%mm2, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1392
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1393 "movq %%mm0, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1394 "paddw %%mm4, %%mm0 \n\t" //d1
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1395
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1396 "paddusw %%mm3, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1397 "psubw %%mm4, %%mm6 \n\t" //d7
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1398
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1399 // d1 d3 - - - d5 d7 -
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1400 "movq 1*8+7*16(%%"REG_d"), %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1401 "psubw %%mm7, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1402
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1403 "psubw %%mm4, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1404 "paddusw %%mm2, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1405
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1406 "paddusw %%mm4, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1407 "paddw %%mm3, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1408
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1409 "paddw %%mm2, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1410 "paddw %%mm4, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1411
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1412 "psubusw %%mm3, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1413 "psubusw %%mm2, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1414
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1415 "psubusw %%mm4, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1416 "movq %%mm1, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1417
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1418 "por %%mm5, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1419 "paddusw %%mm7, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1420
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1421 "por %%mm6, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1422 "paddw %%mm7, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1423
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1424 "packssdw %%mm4, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1425 "psubusw %%mm7, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1426
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1427 "movd %%mm4, %%"REG_a" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1428 "or %%"REG_a", %%"REG_a" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1429 "jnz 3f \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1430 //movq [edi+"DCTSIZE_S"*3*2], mm1
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1431 //movq [edi+"DCTSIZE_S"*5*2], mm5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1432 //movq [edi+"DCTSIZE_S"*1*2], mm0
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1433 //movq [edi+"DCTSIZE_S"*7*2], mm6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1434 // t4 t5 - - - t6 t7 -
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1435 //--- t4 (mm0) may be <>0; mm1, mm5, mm6 == 0
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1436 //Typical numbers: nondc - 19%%, dc - 26%%, zero - 55%%. zero case alone isn't worthwhile
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1437 "movq 0*8+%3, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1438 "movq %%mm0, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1439
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1440 "pmulhw "MANGLE(MM_FIX_0_847759065)", %%mm0 \n\t" //tmp6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1441 "movq %%mm1, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1442
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1443 "movq "DCTSIZE_S"*0*2(%%"REG_D"), %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1444 "movq %%mm2, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1445
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1446 "pmulhw "MANGLE(MM_FIX_0_566454497)", %%mm1 \n\t" //tmp5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1447 "paddw %%mm4, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1448
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1449 "movq 1*8+%3, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1450 //paddw mm3, MM_2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1451 "psraw $2, %%mm3 \n\t" //tmp7
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1452
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1453 "pmulhw "MANGLE(MM_FIX_0_198912367)", %%mm2 \n\t" //-tmp4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1454 "psubw %%mm3, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1455
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1456 "movq "DCTSIZE_S"*1*2(%%"REG_D"), %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1457 "paddw %%mm3, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1458
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1459 "movq %%mm4, "DCTSIZE_S"*7*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1460 "paddw %%mm6, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1461
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1462 "movq 2*8+%3, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1463 "psubw %%mm0, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1464
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1465 "movq "DCTSIZE_S"*2*2(%%"REG_D"), %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1466 "paddw %%mm0, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1467
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1468 "movq %%mm5, "DCTSIZE_S"*0*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1469 "paddw %%mm3, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1470
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1471 "movq %%mm6, "DCTSIZE_S"*6*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1472 "psubw %%mm1, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1473
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1474 "movq "DCTSIZE_S"*5*2(%%"REG_D"), %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1475 "paddw %%mm1, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1476
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1477 "movq "DCTSIZE_S"*3*2(%%"REG_D"), %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1478 "paddw %%mm3, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1479
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1480 "movq 3*8+%3, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1481 "add $24, %%"REG_S" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1482
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1483 "movq %%mm7, "DCTSIZE_S"*1*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1484 "paddw %%mm0, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1485
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1486 "movq %%mm4, "DCTSIZE_S"*2*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1487 "psubw %%mm2, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1488
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1489 "movq "DCTSIZE_S"*4*2(%%"REG_D"), %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1490 "paddw %%mm2, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1491
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1492 "movq %%mm5, "DCTSIZE_S"*5*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1493 "paddw %%mm0, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1494
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1495 "movq %%mm6, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1496
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1497 "movq %%mm7, "DCTSIZE_S"*4*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1498 "add $24, %%"REG_D" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1499 "sub $2, %%"REG_c" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1500 "jnz 1b \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1501 "jmp 5f \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1502
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1503 "3: \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1504 //--- non DC2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1505 //psraw mm1, 2 w/o it -> offset. thr1, thr1, thr1 (actually thr1, thr1, thr1-1)
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1506 //psraw mm5, 2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1507 //psraw mm0, 2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1508 //psraw mm6, 2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1509 "movq %%mm5, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1510 "psubw %%mm1, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1511
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1512 "psllw $1, %%mm5 \n\t" //'z10
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1513 "paddw %%mm1, %%mm3 \n\t" //'z13
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1514
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1515 "movq %%mm0, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1516 "psubw %%mm6, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1517
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1518 "movq %%mm5, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1519 "psllw $1, %%mm0 \n\t" //'z12
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1520
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1521 "pmulhw "MANGLE(MM_FIX_2_613125930)", %%mm1 \n\t" //-
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1522 "paddw %%mm0, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1523
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1524 "pmulhw "MANGLE(MM_FIX_1_847759065)", %%mm5 \n\t" //'z5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1525 "paddw %%mm6, %%mm2 \n\t" //'z11
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1526
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1527 "pmulhw "MANGLE(MM_FIX_1_082392200)", %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1528 "movq %%mm2, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1529
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1530 //---
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1531 "movq 0*8+%3, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1532 "psubw %%mm3, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1533
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1534 "psllw $1, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1535 "paddw %%mm3, %%mm7 \n\t" //'t7
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1536
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1537 "pmulhw "MANGLE(MM_FIX_1_414213562)", %%mm2 \n\t" //'t11
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1538 "movq %%mm4, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1539 //paddw mm7, MM_2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1540 "psraw $2, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1541
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1542 "paddw "DCTSIZE_S"*0*2(%%"REG_D"), %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1543 "psubw %%mm7, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1544
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1545 "movq 1*8+%3, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1546 "paddw %%mm7, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1547
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1548 "movq %%mm6, "DCTSIZE_S"*7*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1549 "paddw %%mm5, %%mm1 \n\t" //'t12
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1550
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1551 "movq %%mm4, "DCTSIZE_S"*0*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1552 "psubw %%mm7, %%mm1 \n\t" //'t6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1553
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1554 "movq 2*8+%3, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1555 "psubw %%mm5, %%mm0 \n\t" //'t10
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1556
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1557 "movq 3*8+%3, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1558 "movq %%mm3, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1559
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1560 "paddw "DCTSIZE_S"*1*2(%%"REG_D"), %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1561 "psubw %%mm1, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1562
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1563 "psubw %%mm1, %%mm2 \n\t" //'t5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1564 "paddw %%mm1, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1565
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1566 "movq %%mm5, "DCTSIZE_S"*6*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1567 "movq %%mm7, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1568
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1569 "paddw "DCTSIZE_S"*2*2(%%"REG_D"), %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1570 "psubw %%mm2, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1571
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1572 "paddw "DCTSIZE_S"*5*2(%%"REG_D"), %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1573 "paddw %%mm2, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1574
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1575 "movq %%mm3, "DCTSIZE_S"*1*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1576 "paddw %%mm2, %%mm0 \n\t" //'t4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1577
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1578 // 't4 't6 't5 - - - - 't7
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1579 "movq %%mm7, "DCTSIZE_S"*2*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1580 "movq %%mm6, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1581
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1582 "paddw "DCTSIZE_S"*4*2(%%"REG_D"), %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1583 "psubw %%mm0, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1584
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1585 "paddw "DCTSIZE_S"*3*2(%%"REG_D"), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1586 "paddw %%mm0, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1587
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1588 "movq %%mm4, "DCTSIZE_S"*5*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1589 "add $24, %%"REG_S" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1590
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1591 "movq %%mm6, "DCTSIZE_S"*4*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1592
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1593 "movq %%mm1, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1594 "add $24, %%"REG_D" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1595 "sub $2, %%"REG_c" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1596 "jnz 1b \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1597 "5: \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1598
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1599 : "+S"(data), "+D"(output), "+c"(cnt), "=o"(temps)
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1600 : "d"(thr_adr)
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1601 : "%"REG_a
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1602 );
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1603 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1604
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1605 #endif // HAVE_MMX
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1606
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
1607 #if !HAVE_MMX
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1608
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1609 static void row_idct_c(DCTELEM* workspace,
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1610 int16_t* output_adr, int output_stride, int cnt)
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1611 {
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1612 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1613 int_simd16_t tmp10, tmp11, tmp12, tmp13;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1614 int_simd16_t z5, z10, z11, z12, z13;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1615 int16_t* outptr;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1616 DCTELEM* wsptr;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
1617
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1618 cnt*=4;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1619 wsptr = workspace;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1620 outptr = output_adr;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
1621 for (; cnt > 0; cnt--) {
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1622 // Even part
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1623 //Simd version reads 4x4 block and transposes it
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1624 tmp10 = ( wsptr[2] + wsptr[3]);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1625 tmp11 = ( wsptr[2] - wsptr[3]);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1626
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1627 tmp13 = ( wsptr[0] + wsptr[1]);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1628 tmp12 = (MULTIPLY16H( wsptr[0] - wsptr[1], FIX_1_414213562_A)<<2) - tmp13;//this shift order to avoid overflow
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1629
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1630 tmp0 = tmp10 + tmp13; //->temps
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1631 tmp3 = tmp10 - tmp13; //->temps
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1632 tmp1 = tmp11 + tmp12;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1633 tmp2 = tmp11 - tmp12;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1634
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1635 // Odd part
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1636 //Also transpose, with previous:
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1637 // ---- ---- ||||
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1638 // ---- ---- idct ||||
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1639 // ---- ---- ---> ||||
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1640 // ---- ---- ||||
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1641 z13 = wsptr[4] + wsptr[5];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1642 z10 = wsptr[4] - wsptr[5];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1643 z11 = wsptr[6] + wsptr[7];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1644 z12 = wsptr[6] - wsptr[7];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1645
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1646 tmp7 = z11 + z13;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1647 tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1648
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1649 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1650 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1651 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1652
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1653 tmp6 = (tmp12<<3) - tmp7;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1654 tmp5 = (tmp11<<3) - tmp6;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1655 tmp4 = (tmp10<<3) + tmp5;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1656
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1657 // Final output stage: descale and write column
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1658 outptr[0*output_stride]+= DESCALE(tmp0 + tmp7, 3);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1659 outptr[1*output_stride]+= DESCALE(tmp1 + tmp6, 3);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1660 outptr[2*output_stride]+= DESCALE(tmp2 + tmp5, 3);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1661 outptr[3*output_stride]+= DESCALE(tmp3 - tmp4, 3);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1662 outptr[4*output_stride]+= DESCALE(tmp3 + tmp4, 3);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1663 outptr[5*output_stride]+= DESCALE(tmp2 - tmp5, 3);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1664 outptr[6*output_stride]+= DESCALE(tmp1 - tmp6, 3); //no += ?
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1665 outptr[7*output_stride]+= DESCALE(tmp0 - tmp7, 3); //no += ?
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1666 outptr++;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1667
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1668 wsptr += DCTSIZE; // advance pointer to next row
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1669 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1670 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1671
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1672 #else /* HAVE_MMX */
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1673
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
1674 static void row_idct_mmx (DCTELEM* workspace,
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1675 int16_t* output_adr, int output_stride, int cnt)
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1676 {
26052
ce480034f391 Do not use a global temps variable, this is ugly and does not compile with ICC.
reimar
parents: 26050
diff changeset
1677 uint64_t __attribute__((aligned(8))) temps[4];
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 26727
diff changeset
1678 __asm__ volatile(
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1679 "lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1680 "1: \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1681 "movq "DCTSIZE_S"*0*2(%%"REG_S"), %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1682 //
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1683
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1684 "movq "DCTSIZE_S"*1*2(%%"REG_S"), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1685 "movq %%mm0, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1686
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1687 "movq "DCTSIZE_S"*2*2(%%"REG_S"), %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1688 "punpcklwd %%mm1, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1689
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1690 "movq "DCTSIZE_S"*3*2(%%"REG_S"), %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1691 "punpckhwd %%mm1, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1692
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1693 //transpose 4x4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1694 "movq %%mm2, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1695 "punpcklwd %%mm3, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1696
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1697 "movq %%mm0, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1698 "punpckldq %%mm2, %%mm0 \n\t" //0
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1699
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1700 "punpckhdq %%mm2, %%mm6 \n\t" //1
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1701 "movq %%mm0, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1703 "punpckhwd %%mm3, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1704 "psubw %%mm6, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1705
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1706 "pmulhw "MANGLE(MM_FIX_1_414213562_A)", %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1707 "movq %%mm4, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1708
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1709 "punpckldq %%mm7, %%mm4 \n\t" //2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1710 "paddw %%mm6, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1711
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1712 "punpckhdq %%mm7, %%mm2 \n\t" //3
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1713 "movq %%mm4, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1714
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1715 "psllw $2, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1716 "paddw %%mm2, %%mm4 \n\t" //t10
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1717
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1718 "movq "DCTSIZE_S"*0*2+"DCTSIZE_S"(%%"REG_S"), %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1719 "psubw %%mm2, %%mm1 \n\t" //t11
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1720
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1721 "movq "DCTSIZE_S"*1*2+"DCTSIZE_S"(%%"REG_S"), %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1722 "psubw %%mm5, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1723
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1724 "movq %%mm4, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1725 "paddw %%mm5, %%mm4 \n\t" //t0
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1726
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1727 "psubw %%mm5, %%mm6 \n\t" //t3
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1728 "movq %%mm1, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1729
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1730 "movq "DCTSIZE_S"*2*2+"DCTSIZE_S"(%%"REG_S"), %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1731 "paddw %%mm0, %%mm1 \n\t" //t1
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1732
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1733 "movq %%mm4, 0*8+%3 \n\t" //t0
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1734 "movq %%mm3, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1735
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1736 "movq %%mm6, 1*8+%3 \n\t" //t3
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1737 "punpcklwd %%mm2, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1738
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1739 //transpose 4x4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1740 "movq "DCTSIZE_S"*3*2+"DCTSIZE_S"(%%"REG_S"), %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1741 "punpckhwd %%mm2, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1742
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1743 "movq %%mm5, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1744 "punpcklwd %%mm6, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1745
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1746 "psubw %%mm0, %%mm7 \n\t" //t2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1747 "punpckhwd %%mm6, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1748
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1749 "movq %%mm3, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1750 "punpckldq %%mm5, %%mm3 \n\t" //4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1751
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1752 "punpckhdq %%mm5, %%mm0 \n\t" //5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1753 "movq %%mm4, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1754
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1755 //
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1756 "movq %%mm3, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1757 "punpckldq %%mm2, %%mm4 \n\t" //6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1758
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1759 "psubw %%mm0, %%mm3 \n\t" //z10
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1760 "punpckhdq %%mm2, %%mm5 \n\t" //7
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1761
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1762 "paddw %%mm0, %%mm6 \n\t" //z13
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1763 "movq %%mm4, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1764
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1765 "movq %%mm3, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1766 "psubw %%mm5, %%mm4 \n\t" //z12
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1767
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1768 "pmulhw "MANGLE(MM_FIX_2_613125930)", %%mm0 \n\t" //-
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1769 "paddw %%mm4, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1770
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1771 "pmulhw "MANGLE(MM_FIX_1_847759065)", %%mm3 \n\t" //z5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1772 "paddw %%mm5, %%mm2 \n\t" //z11 >
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1773
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1774 "pmulhw "MANGLE(MM_FIX_1_082392200)", %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1775 "movq %%mm2, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1776
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1777 "psubw %%mm6, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1778 "paddw %%mm6, %%mm5 \n\t" //t7
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1779
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1780 "pmulhw "MANGLE(MM_FIX_1_414213562)", %%mm2 \n\t" //t11
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1781 "paddw %%mm3, %%mm0 \n\t" //t12
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1782
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1783 "psllw $3, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1784 "psubw %%mm3, %%mm4 \n\t" //t10
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1785
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1786 "movq 0*8+%3, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1787 "movq %%mm1, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1788
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1789 "psllw $3, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1790 "psubw %%mm5, %%mm0 \n\t" //t6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1791
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1792 "psllw $3, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1793 "paddw %%mm0, %%mm1 \n\t" //d1
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1794
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1795 "psubw %%mm0, %%mm2 \n\t" //t5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1796 "psubw %%mm0, %%mm3 \n\t" //d6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1797
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1798 "paddw %%mm2, %%mm4 \n\t" //t4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1799 "movq %%mm7, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1800
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1801 "paddw %%mm2, %%mm7 \n\t" //d2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1802 "psubw %%mm2, %%mm0 \n\t" //d5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1803
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1804 "movq "MANGLE(MM_DESCALE_RND)", %%mm2 \n\t" //4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1805 "psubw %%mm5, %%mm6 \n\t" //d7
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1806
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1807 "paddw 0*8+%3, %%mm5 \n\t" //d0
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1808 "paddw %%mm2, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1809
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1810 "paddw %%mm2, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1811 "psraw $3, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1812
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1813 "paddw %%mm2, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1814 "psraw $3, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1815
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1816 "paddw (%%"REG_D"), %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1817 "psraw $3, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1818
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1819 "paddw (%%"REG_D",%%"REG_a",), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1820 "paddw %%mm2, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1821
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1822 "paddw (%%"REG_D",%%"REG_a",2), %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1823 "paddw %%mm2, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1824
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1825 "movq %%mm5, (%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1826 "paddw %%mm2, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1827
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1828 "movq %%mm1, (%%"REG_D",%%"REG_a",) \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1829 "psraw $3, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1830
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1831 "movq %%mm7, (%%"REG_D",%%"REG_a",2) \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1832 "add %%"REG_d", %%"REG_D" \n\t" //3*ls
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1833
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1834 "movq 1*8+%3, %%mm5 \n\t" //t3
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1835 "psraw $3, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1836
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1837 "paddw (%%"REG_D",%%"REG_a",2), %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1838 "psubw %%mm4, %%mm5 \n\t" //d3
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1839
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1840 "paddw (%%"REG_D",%%"REG_d",), %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1841 "psraw $3, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1842
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1843 "paddw 1*8+%3, %%mm4 \n\t" //d4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1844 "paddw %%mm2, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1845
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1846 "paddw (%%"REG_D",%%"REG_a",4), %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1847 "paddw %%mm2, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1848
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1849 "movq %%mm0, (%%"REG_D",%%"REG_a",2) \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1850 "psraw $3, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1851
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1852 "paddw (%%"REG_D"), %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1853 "psraw $3, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1854
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1855 "paddw (%%"REG_D",%%"REG_a",), %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1856 "add $"DCTSIZE_S"*2*4, %%"REG_S" \n\t" //4 rows
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1857
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1858 "movq %%mm3, (%%"REG_D",%%"REG_d",) \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1859 "movq %%mm6, (%%"REG_D",%%"REG_a",4) \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1860 "movq %%mm5, (%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1861 "movq %%mm4, (%%"REG_D",%%"REG_a",) \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1862
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1863 "sub %%"REG_d", %%"REG_D" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1864 "add $8, %%"REG_D" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1865 "dec %%"REG_c" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1866 "jnz 1b \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1867
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1868 : "+S"(workspace), "+D"(output_adr), "+c"(cnt), "=o"(temps)
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1869 : "a"(output_stride*sizeof(short))
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1870 : "%"REG_d
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1871 );
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1872 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1873
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1874 #endif // HAVE_MMX
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1875
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
1876 #if !HAVE_MMX
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1877
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1878 static void row_fdct_c(DCTELEM *data, const uint8_t *pixels, int line_size, int cnt)
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1879 {
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1880 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1881 int_simd16_t tmp10, tmp11, tmp12, tmp13;
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1882 int_simd16_t z1, z2, z3, z4, z5, z11, z13;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
1883 DCTELEM *dataptr;
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
1884
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1885 cnt*=4;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
1886 // Pass 1: process rows.
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
1887
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1888 dataptr = data;
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29087
diff changeset
1889 for (; cnt > 0; cnt--) {
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1890 tmp0 = pixels[line_size*0] + pixels[line_size*7];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1891 tmp7 = pixels[line_size*0] - pixels[line_size*7];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1892 tmp1 = pixels[line_size*1] + pixels[line_size*6];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1893 tmp6 = pixels[line_size*1] - pixels[line_size*6];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1894 tmp2 = pixels[line_size*2] + pixels[line_size*5];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1895 tmp5 = pixels[line_size*2] - pixels[line_size*5];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1896 tmp3 = pixels[line_size*3] + pixels[line_size*4];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1897 tmp4 = pixels[line_size*3] - pixels[line_size*4];
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1898
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1899 // Even part
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1900
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1901 tmp10 = tmp0 + tmp3;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1902 tmp13 = tmp0 - tmp3;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1903 tmp11 = tmp1 + tmp2;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1904 tmp12 = tmp1 - tmp2;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1905 //Even columns are written first, this leads to different order of columns
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1906 //in column_fidct(), but they are processed independently, so all ok.
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1907 //Later in the row_idct() columns readed at the same order.
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1908 dataptr[2] = tmp10 + tmp11;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1909 dataptr[3] = tmp10 - tmp11;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1910
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1911 z1 = MULTIPLY16H((tmp12 + tmp13)<<2, FIX_0_707106781);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1912 dataptr[0] = tmp13 + z1;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1913 dataptr[1] = tmp13 - z1;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1914
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1915 // Odd part
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1916
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1917 tmp10 = (tmp4 + tmp5) <<2;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1918 tmp11 = (tmp5 + tmp6) <<2;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1919 tmp12 = (tmp6 + tmp7) <<2;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1920
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1921 z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1922 z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1923 z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1924 z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1925
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1926 z11 = tmp7 + z3;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1927 z13 = tmp7 - z3;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1928
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1929 dataptr[4] = z13 + z2;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1930 dataptr[5] = z13 - z2;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1931 dataptr[6] = z11 + z4;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1932 dataptr[7] = z11 - z4;
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1933
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1934 pixels++; // advance pointer to next column
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1935 dataptr += DCTSIZE;
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1936 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1937 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1938
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1939 #else /* HAVE_MMX */
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1940
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1941 static void row_fdct_mmx(DCTELEM *data, const uint8_t *pixels, int line_size, int cnt)
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
1942 {
26052
ce480034f391 Do not use a global temps variable, this is ugly and does not compile with ICC.
reimar
parents: 26050
diff changeset
1943 uint64_t __attribute__((aligned(8))) temps[4];
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 26727
diff changeset
1944 __asm__ volatile(
32702
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1945 "lea (%%"REG_a",%%"REG_a",2), %%"REG_d" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1946 "6: \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1947 "movd (%%"REG_S"), %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1948 "pxor %%mm7, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1949
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1950 "movd (%%"REG_S",%%"REG_a",), %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1951 "punpcklbw %%mm7, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1952
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1953 "movd (%%"REG_S",%%"REG_a",2), %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1954 "punpcklbw %%mm7, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1955
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1956 "punpcklbw %%mm7, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1957 "add %%"REG_d", %%"REG_S" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1958
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1959 "movq %%mm0, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1960 //
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1961
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1962 "movd (%%"REG_S",%%"REG_a",4), %%mm3 \n\t" //7 ;prefetch!
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1963 "movq %%mm1, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1964
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1965 "movd (%%"REG_S",%%"REG_d",), %%mm4 \n\t" //6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1966 "punpcklbw %%mm7, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1967
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1968 "psubw %%mm3, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1969 "punpcklbw %%mm7, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1970
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1971 "paddw %%mm3, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1972 "psubw %%mm4, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1973
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1974 "movd (%%"REG_S",%%"REG_a",2), %%mm3 \n\t" //5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1975 "paddw %%mm4, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1976
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1977 "movq %%mm5, 0*8+%3 \n\t" //t7
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1978 "punpcklbw %%mm7, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1979
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1980 "movq %%mm6, 1*8+%3 \n\t" //t6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1981 "movq %%mm2, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1982
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1983 "movd (%%"REG_S"), %%mm5 \n\t" //3
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1984 "paddw %%mm3, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1985
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1986 "movd (%%"REG_S",%%"REG_a",), %%mm6 \n\t" //4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1987 "punpcklbw %%mm7, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1988
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1989 "psubw %%mm3, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1990 "punpcklbw %%mm7, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1991
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1992 "movq %%mm5, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1993 "paddw %%mm6, %%mm5 \n\t" //t3
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1994
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1995 "psubw %%mm6, %%mm3 \n\t" //t4 ; t0 t1 t2 t4 t5 t3 - -
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1996 "movq %%mm0, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1997
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1998 "movq %%mm1, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
1999 "psubw %%mm5, %%mm0 \n\t" //t13
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2000
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2001 "psubw %%mm2, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2002 "paddw %%mm2, %%mm7 \n\t" //t11
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2003
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2004 "paddw %%mm0, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2005 "movq %%mm7, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2006
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2007 "psllw $2, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2008 "paddw %%mm5, %%mm6 \n\t" //t10
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2009
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2010 "pmulhw "MANGLE(MM_FIX_0_707106781)", %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2011 "paddw %%mm6, %%mm7 \n\t" //d2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2012
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2013 "psubw %%mm2, %%mm6 \n\t" //d3
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2014 "movq %%mm0, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2015
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2016 //transpose 4x4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2017 "movq %%mm7, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2018 "punpcklwd %%mm6, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2019
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2020 "paddw %%mm1, %%mm0 \n\t" //d0
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2021 "punpckhwd %%mm6, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2022
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2023 "psubw %%mm1, %%mm5 \n\t" //d1
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2024 "movq %%mm0, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2025
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2026 "movq 1*8+%3, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2027 "punpcklwd %%mm5, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2028
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2029 "punpckhwd %%mm5, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2030 "movq %%mm0, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2031
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2032 "punpckldq %%mm7, %%mm0 \n\t" //0
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2033 "paddw %%mm4, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2034
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2035 "punpckhdq %%mm7, %%mm5 \n\t" //1
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2036 "movq %%mm6, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2037
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2038 "movq %%mm0, "DCTSIZE_S"*0*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2039 "punpckldq %%mm2, %%mm6 \n\t" //2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2040
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2041 "movq %%mm5, "DCTSIZE_S"*1*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2042 "punpckhdq %%mm2, %%mm7 \n\t" //3
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2043
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2044 "movq %%mm6, "DCTSIZE_S"*2*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2045 "paddw %%mm1, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2046
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2047 "movq %%mm7, "DCTSIZE_S"*3*2(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2048 "psllw $2, %%mm3 \n\t" //t10
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2049
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2050 "movq 0*8+%3, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2051 "psllw $2, %%mm4 \n\t" //t11
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2052
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2053 "pmulhw "MANGLE(MM_FIX_0_707106781)", %%mm4 \n\t" //z3
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2054 "paddw %%mm2, %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2055
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2056 "psllw $2, %%mm1 \n\t" //t12
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2057 "movq %%mm3, %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2058
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2059 "pmulhw "MANGLE(MM_FIX_0_541196100)", %%mm0 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2060 "psubw %%mm1, %%mm3 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2061
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2062 "pmulhw "MANGLE(MM_FIX_0_382683433)", %%mm3 \n\t" //z5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2063 "movq %%mm2, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2064
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2065 "pmulhw "MANGLE(MM_FIX_1_306562965)", %%mm1 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2066 "psubw %%mm4, %%mm2 \n\t" //z13
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2067
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2068 "paddw %%mm4, %%mm5 \n\t" //z11
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2069 "movq %%mm2, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2070
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2071 "paddw %%mm3, %%mm0 \n\t" //z2
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2072 "movq %%mm5, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2073
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2074 "paddw %%mm0, %%mm2 \n\t" //d4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2075 "psubw %%mm0, %%mm6 \n\t" //d5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2076
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2077 "movq %%mm2, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2078 "paddw %%mm3, %%mm1 \n\t" //z4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2079
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2080 //transpose 4x4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2081 "punpcklwd %%mm6, %%mm2 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2082 "paddw %%mm1, %%mm5 \n\t" //d6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2083
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2084 "punpckhwd %%mm6, %%mm4 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2085 "psubw %%mm1, %%mm7 \n\t" //d7
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2086
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2087 "movq %%mm5, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2088 "punpcklwd %%mm7, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2089
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2090 "punpckhwd %%mm7, %%mm6 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2091 "movq %%mm2, %%mm7 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2092
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2093 "punpckldq %%mm5, %%mm2 \n\t" //4
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2094 "sub %%"REG_d", %%"REG_S" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2095
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2096 "punpckhdq %%mm5, %%mm7 \n\t" //5
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2097 "movq %%mm4, %%mm5 \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2098
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2099 "movq %%mm2, "DCTSIZE_S"*0*2+"DCTSIZE_S"(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2100 "punpckldq %%mm6, %%mm4 \n\t" //6
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2101
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2102 "movq %%mm7, "DCTSIZE_S"*1*2+"DCTSIZE_S"(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2103 "punpckhdq %%mm6, %%mm5 \n\t" //7
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2104
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2105 "movq %%mm4, "DCTSIZE_S"*2*2+"DCTSIZE_S"(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2106 "add $4, %%"REG_S" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2107
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2108 "movq %%mm5, "DCTSIZE_S"*3*2+"DCTSIZE_S"(%%"REG_D") \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2109 "add $"DCTSIZE_S"*2*4, %%"REG_D" \n\t" //4 rows
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2110 "dec %%"REG_c" \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2111 "jnz 6b \n\t"
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2112
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2113 : "+S"(pixels), "+D"(data), "+c"(cnt), "=o"(temps)
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2114 : "a"(line_size)
7af3e6f901fd Convert some tabs to whitespace to allow using MPlayer filter sourcecode in FFmpeg.
cehoyos
parents: 32537
diff changeset
2115 : "%"REG_d);
15631
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
2116 }
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
2117
d5a95e6f5f07 faster spp filter by Nikolaj Poroshin <porosh3 at psu ru>
henry
parents:
diff changeset
2118 #endif // HAVE_MMX