Mercurial > mplayer.hg
annotate libmpcodecs/vf_tfields.c @ 17149:9a0a376a54b1
Move audio packets reordering from codec interface to demuxers for real
files (old and new format), pass only real extradata to the codec
Enable cook codec from lavc, prefer lavc codecs for 14_4 and 28_8
formats. Disable internal 28_8, it's broken now and will be removed soon
author | rtognimp |
---|---|
date | Fri, 09 Dec 2005 16:25:37 +0000 |
parents | 6ff3379a0862 |
children | 20aca9baf5d8 |
rev | line source |
---|---|
9514 | 1 #include <stdio.h> |
2 #include <stdlib.h> | |
3 #include <string.h> | |
4 | |
17012 | 5 #include "config.h" |
6 #include "mp_msg.h" | |
7 #include "cpudetect.h" | |
9514 | 8 |
9 #include "img_format.h" | |
10 #include "mp_image.h" | |
11 #include "vf.h" | |
12 | |
17012 | 13 #include "libvo/fastmemcpy.h" |
9514 | 14 |
15 struct vf_priv_s { | |
16 int mode; | |
14888
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
17 int parity; |
9514 | 18 }; |
19 | |
20 static inline void *my_memcpy_pic(void * dst, void * src, int bytesPerLine, int height, int dstStride, int srcStride) | |
21 { | |
22 int i; | |
23 void *retval=dst; | |
24 | |
25 for(i=0; i<height; i++) | |
26 { | |
27 memcpy(dst, src, bytesPerLine); | |
28 src+= srcStride; | |
29 dst+= dstStride; | |
30 } | |
31 | |
32 return retval; | |
33 } | |
34 | |
35 static void deint(unsigned char *dest, int ds, unsigned char *src, int ss, int w, int h, int field) | |
36 { | |
37 int x, y; | |
38 src += ss; | |
39 dest += ds; | |
40 if (field) { | |
41 src += ss; | |
42 dest += ds; | |
43 h -= 2; | |
44 } | |
45 for (y=h/2; y; y--) { | |
46 for (x=0; x<w; x++) { | |
47 if (((src[x-ss] < src[x]) && (src[x+ss] < src[x])) || | |
48 ((src[x-ss] > src[x]) && (src[x+ss] > src[x]))) { | |
49 //dest[x] = (src[x+ss] + src[x-ss])>>1; | |
50 dest[x] = ((src[x+ss]<<1) + (src[x-ss]<<1) | |
51 + src[x+ss+1] + src[x-ss+1] | |
52 + src[x+ss-1] + src[x-ss-1])>>3; | |
53 } | |
54 else dest[x] = src[x]; | |
55 } | |
56 dest += ds<<1; | |
57 src += ss<<1; | |
58 } | |
59 } | |
60 | |
10020 | 61 #ifdef HAVE_3DNOW |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
62 static void qpel_li_3DNOW(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up) |
10009
69f10d08c3be
new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents:
9593
diff
changeset
|
63 { |
10020 | 64 int i, j, ssd=ss; |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
65 long crap1, crap2; |
10020 | 66 if (up) { |
67 ssd = -ss; | |
68 memcpy(d, s, w); | |
69 d += ds; | |
70 s += ss; | |
71 } | |
10009
69f10d08c3be
new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents:
9593
diff
changeset
|
72 for (i=h-1; i; i--) { |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
73 asm volatile( |
10020 | 74 "1: \n\t" |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
75 "movq (%%"REG_S"), %%mm0 \n\t" |
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
76 "movq (%%"REG_S",%%"REG_a"), %%mm1 \n\t" |
10020 | 77 "pavgusb %%mm0, %%mm1 \n\t" |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
78 "add $8, %%"REG_S" \n\t" |
10020 | 79 "pavgusb %%mm0, %%mm1 \n\t" |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
80 "movq %%mm1, (%%"REG_D") \n\t" |
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
81 "add $8, %%"REG_D" \n\t" |
10020 | 82 "decl %%ecx \n\t" |
83 "jnz 1b \n\t" | |
84 : "=S"(crap1), "=D"(crap2) | |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
85 : "c"(w>>3), "S"(s), "D"(d), "a"((long)ssd) |
10020 | 86 ); |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
87 for (j=w-(w&7); j<w; j++) |
10020 | 88 d[j] = (s[j+ssd] + 3*s[j])>>2; |
10009
69f10d08c3be
new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents:
9593
diff
changeset
|
89 d += ds; |
69f10d08c3be
new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents:
9593
diff
changeset
|
90 s += ss; |
69f10d08c3be
new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents:
9593
diff
changeset
|
91 } |
10020 | 92 if (!up) memcpy(d, s, w); |
93 asm volatile("emms \n\t" : : : "memory"); | |
10009
69f10d08c3be
new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents:
9593
diff
changeset
|
94 } |
10020 | 95 #endif |
10009
69f10d08c3be
new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents:
9593
diff
changeset
|
96 |
10020 | 97 #ifdef HAVE_MMX2 |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
98 static void qpel_li_MMX2(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up) |
10009
69f10d08c3be
new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents:
9593
diff
changeset
|
99 { |
10020 | 100 int i, j, ssd=ss; |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
101 long crap1, crap2; |
10020 | 102 if (up) { |
103 ssd = -ss; | |
104 memcpy(d, s, w); | |
105 d += ds; | |
106 s += ss; | |
107 } | |
10009
69f10d08c3be
new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents:
9593
diff
changeset
|
108 for (i=h-1; i; i--) { |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
109 asm volatile( |
10020 | 110 "pxor %%mm7, %%mm7 \n\t" |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
111 "2: \n\t" |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
112 "movq (%%"REG_S"), %%mm0 \n\t" |
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
113 "movq (%%"REG_S",%%"REG_a"), %%mm1 \n\t" |
10020 | 114 "pavgb %%mm0, %%mm1 \n\t" |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
115 "add $8, %%"REG_S" \n\t" |
10020 | 116 "pavgb %%mm0, %%mm1 \n\t" |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
117 "movq %%mm1, (%%"REG_D") \n\t" |
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
118 "add $8, %%"REG_D" \n\t" |
10020 | 119 "decl %%ecx \n\t" |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
120 "jnz 2b \n\t" |
10020 | 121 : "=S"(crap1), "=D"(crap2) |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
122 : "c"(w>>3), "S"(s), "D"(d), "a"((long)ssd) |
10020 | 123 ); |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
124 for (j=w-(w&7); j<w; j++) |
10020 | 125 d[j] = (s[j+ssd] + 3*s[j])>>2; |
10009
69f10d08c3be
new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents:
9593
diff
changeset
|
126 d += ds; |
69f10d08c3be
new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents:
9593
diff
changeset
|
127 s += ss; |
69f10d08c3be
new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents:
9593
diff
changeset
|
128 } |
10020 | 129 if (!up) memcpy(d, s, w); |
130 asm volatile("emms \n\t" : : : "memory"); | |
131 } | |
132 #endif | |
133 | |
134 #ifdef HAVE_MMX | |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
135 static void qpel_li_MMX(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up) |
10020 | 136 { |
137 int i, j, ssd=ss; | |
138 int crap1, crap2; | |
139 if (up) { | |
140 ssd = -ss; | |
141 memcpy(d, s, w); | |
142 d += ds; | |
143 s += ss; | |
144 } | |
145 for (i=h-1; i; i--) { | |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
146 asm volatile( |
10020 | 147 "pxor %%mm7, %%mm7 \n\t" |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
148 "3: \n\t" |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
149 "movq (%%"REG_S"), %%mm0 \n\t" |
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
150 "movq (%%"REG_S"), %%mm1 \n\t" |
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
151 "movq (%%"REG_S",%%"REG_a"), %%mm2 \n\t" |
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
152 "movq (%%"REG_S",%%"REG_a"), %%mm3 \n\t" |
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
153 "add $8, %%"REG_S" \n\t" |
10020 | 154 "punpcklbw %%mm7, %%mm0 \n\t" |
155 "punpckhbw %%mm7, %%mm1 \n\t" | |
156 "punpcklbw %%mm7, %%mm2 \n\t" | |
157 "punpckhbw %%mm7, %%mm3 \n\t" | |
158 "paddw %%mm0, %%mm2 \n\t" | |
159 "paddw %%mm1, %%mm3 \n\t" | |
160 "paddw %%mm0, %%mm2 \n\t" | |
161 "paddw %%mm1, %%mm3 \n\t" | |
162 "paddw %%mm0, %%mm2 \n\t" | |
163 "paddw %%mm1, %%mm3 \n\t" | |
164 "psrlw $2, %%mm2 \n\t" | |
165 "psrlw $2, %%mm3 \n\t" | |
166 "packsswb %%mm3, %%mm2 \n\t" | |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
167 "movq %%mm2, (%%"REG_D") \n\t" |
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
168 "add $8, %%"REG_D" \n\t" |
10020 | 169 "decl %%ecx \n\t" |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
170 "jnz 3b \n\t" |
10020 | 171 : "=S"(crap1), "=D"(crap2) |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
172 : "c"(w>>3), "S"(s), "D"(d), "a"((long)ssd) |
10020 | 173 ); |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
174 for (j=w-(w&7); j<w; j++) |
10020 | 175 d[j] = (s[j+ssd] + 3*s[j])>>2; |
176 d += ds; | |
177 s += ss; | |
178 } | |
179 if (!up) memcpy(d, s, w); | |
180 asm volatile("emms \n\t" : : : "memory"); | |
181 } | |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
182 |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
183 static void qpel_4tap_MMX(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up) |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
184 { |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
185 int i, j, ssd=ss; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
186 static const short filter[] = { |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
187 29, 29, 29, 29, 110, 110, 110, 110, |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
188 9, 9, 9, 9, 3, 3, 3, 3, |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
189 64, 64, 64, 64 }; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
190 int crap1, crap2; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
191 if (up) { |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
192 ssd = -ss; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
193 memcpy(d, s, w); |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
194 d += ds; s += ss; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
195 } |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
196 for (j=0; j<w; j++) |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
197 d[j] = (s[j+ssd] + 3*s[j])>>2; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
198 d += ds; s += ss; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
199 for (i=h-3; i; i--) { |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
200 asm volatile( |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
201 "pxor %%mm0, %%mm0 \n\t" |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
202 "movq (%%"REG_d"), %%mm4 \n\t" |
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
203 "movq 8(%%"REG_d"), %%mm5 \n\t" |
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
204 "movq 16(%%"REG_d"), %%mm6 \n\t" |
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
205 "movq 24(%%"REG_d"), %%mm7 \n\t" |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
206 "4: \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
207 |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
208 "movq (%%"REG_S",%%"REG_a"), %%mm1 \n\t" |
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
209 "movq (%%"REG_S"), %%mm2 \n\t" |
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
210 "movq (%%"REG_S",%%"REG_b"), %%mm3 \n\t" |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
211 "punpcklbw %%mm0, %%mm1 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
212 "punpcklbw %%mm0, %%mm2 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
213 "pmullw %%mm4, %%mm1 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
214 "punpcklbw %%mm0, %%mm3 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
215 "pmullw %%mm5, %%mm2 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
216 "paddusw %%mm2, %%mm1 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
217 "pmullw %%mm6, %%mm3 \n\t" |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
218 "movq (%%"REG_S",%%"REG_a",2), %%mm2 \n\t" |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
219 "psubusw %%mm3, %%mm1 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
220 "punpcklbw %%mm0, %%mm2 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
221 "pmullw %%mm7, %%mm2 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
222 "psubusw %%mm2, %%mm1 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
223 "psrlw $7, %%mm1 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
224 |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
225 "movq (%%"REG_S",%%"REG_a"), %%mm2 \n\t" |
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
226 "movq (%%"REG_S"), %%mm3 \n\t" |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
227 "punpckhbw %%mm0, %%mm2 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
228 "punpckhbw %%mm0, %%mm3 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
229 "pmullw %%mm4, %%mm2 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
230 "pmullw %%mm5, %%mm3 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
231 "paddusw %%mm3, %%mm2 \n\t" |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
232 "movq (%%"REG_S",%%"REG_b"), %%mm3 \n\t" |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
233 "punpckhbw %%mm0, %%mm3 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
234 "pmullw %%mm6, %%mm3 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
235 "psubusw %%mm3, %%mm2 \n\t" |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
236 "movq (%%"REG_S",%%"REG_a",2), %%mm3 \n\t" |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
237 "punpckhbw %%mm0, %%mm3 \n\t" |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
238 "add $8, %%"REG_S" \n\t" |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
239 "pmullw %%mm7, %%mm3 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
240 "psubusw %%mm3, %%mm2 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
241 "psrlw $7, %%mm2 \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
242 |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
243 "packuswb %%mm2, %%mm1 \n\t" |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
244 "movq %%mm1, (%%"REG_D") \n\t" |
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
245 "add $8, %%"REG_D" \n\t" |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
246 "decl %%ecx \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
247 "jnz 4b \n\t" |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
248 : "=S"(crap1), "=D"(crap2) |
13720
821f464b4d90
adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents:
10078
diff
changeset
|
249 : "c"(w>>3), "S"(s), "D"(d), "a"((long)ssd), "b"((long)-ssd), "d"(filter) |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
250 ); |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
251 for (j=w-(w&7); j<w; j++) |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
252 d[j] = (-9*s[j-ssd] + 111*s[j] + 29*s[j+ssd] - 3*s[j+ssd+ssd])>>7; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
253 d += ds; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
254 s += ss; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
255 } |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
256 for (j=0; j<w; j++) |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
257 d[j] = (s[j+ssd] + 3*s[j])>>2; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
258 d += ds; s += ss; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
259 if (!up) memcpy(d, s, w); |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
260 asm volatile("emms \n\t" : : : "memory"); |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
261 } |
10020 | 262 #endif |
263 | |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
264 static inline int clamp(int a) |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
265 { |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
266 // If a<512, this is equivalent to: |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
267 // return (a<0) ? 0 : ( (a>255) ? 255 : a); |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
268 return (~(a>>31)) & (a | ((a<<23)>>31)); |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
269 } |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
270 |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
271 static void qpel_li_C(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up) |
10020 | 272 { |
273 int i, j, ssd=ss; | |
274 if (up) { | |
275 ssd = -ss; | |
276 memcpy(d, s, w); | |
277 d += ds; | |
278 s += ss; | |
279 } | |
280 for (i=h-1; i; i--) { | |
281 for (j=0; j<w; j++) | |
282 d[j] = (s[j+ssd] + 3*s[j])>>2; | |
283 d += ds; | |
284 s += ss; | |
285 } | |
286 if (!up) memcpy(d, s, w); | |
10009
69f10d08c3be
new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents:
9593
diff
changeset
|
287 } |
9514 | 288 |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
289 static void qpel_4tap_C(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up) |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
290 { |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
291 int i, j, ssd=ss; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
292 if (up) { |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
293 ssd = -ss; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
294 memcpy(d, s, w); |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
295 d += ds; s += ss; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
296 } |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
297 for (j=0; j<w; j++) |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
298 d[j] = (s[j+ssd] + 3*s[j] + 2)>>2; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
299 d += ds; s += ss; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
300 for (i=h-3; i; i--) { |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
301 for (j=0; j<w; j++) |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
302 d[j] = clamp((-9*s[j-ssd] + 111*s[j] + 29*s[j+ssd] - 3*s[j+ssd+ssd] + 64)>>7); |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
303 d += ds; s += ss; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
304 } |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
305 for (j=0; j<w; j++) |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
306 d[j] = (s[j+ssd] + 3*s[j] + 2)>>2; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
307 d += ds; s += ss; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
308 if (!up) memcpy(d, s, w); |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
309 } |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
310 |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
311 static void (*qpel_li)(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up); |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
312 static void (*qpel_4tap)(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up); |
9514 | 313 |
314 static int put_image(struct vf_instance_s* vf, mp_image_t *mpi) | |
315 { | |
14888
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
316 int i; |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
317 int ret=0; |
9514 | 318 mp_image_t *dmpi; |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
319 void (*qpel)(unsigned char *, unsigned char *, int, int, int, int, int); |
10078
379f48cace77
support more image formats. hopefully this bpp handling is correct...
rfelker
parents:
10052
diff
changeset
|
320 int bpp=1; |
14888
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
321 int tff; |
10078
379f48cace77
support more image formats. hopefully this bpp handling is correct...
rfelker
parents:
10052
diff
changeset
|
322 |
379f48cace77
support more image formats. hopefully this bpp handling is correct...
rfelker
parents:
10052
diff
changeset
|
323 if (!(mpi->flags & MP_IMGFLAG_PLANAR)) bpp = mpi->bpp/8; |
14888
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
324 if (vf->priv->parity < 0) { |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
325 if (mpi->fields & MP_IMGFIELD_ORDERED) |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
326 tff = mpi->fields & MP_IMGFIELD_TOP_FIRST; |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
327 else |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
328 tff = 1; |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
329 } |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
330 else tff = (vf->priv->parity&1)^1; |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
331 |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
332 switch (vf->priv->mode) { |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
333 case 2: |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
334 qpel = qpel_li; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
335 break; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
336 case 3: |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
337 // TODO: add 3tap filter |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
338 qpel = qpel_4tap; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
339 break; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
340 case 4: |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
341 qpel = qpel_4tap; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
342 break; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
343 } |
9514 | 344 |
345 switch (vf->priv->mode) { | |
346 case 0: | |
14888
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
347 for (i=0; i<2; i++) { |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
348 dmpi = vf_get_image(vf->next, mpi->imgfmt, |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
349 MP_IMGTYPE_EXPORT, MP_IMGFLAG_ACCEPT_STRIDE, |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
350 mpi->width, mpi->height/2); |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
351 dmpi->planes[0] = mpi->planes[0] + (i^!tff)*mpi->stride[0]; |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
352 dmpi->stride[0] = 2*mpi->stride[0]; |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
353 if (mpi->flags & MP_IMGFLAG_PLANAR) { |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
354 dmpi->planes[1] = mpi->planes[1] + (i^!tff)*mpi->stride[1]; |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
355 dmpi->planes[2] = mpi->planes[2] + (i^!tff)*mpi->stride[2]; |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
356 dmpi->stride[1] = 2*mpi->stride[1]; |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
357 dmpi->stride[2] = 2*mpi->stride[2]; |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
358 } |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
359 ret |= vf_next_put_image(vf, dmpi); |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
360 if (!i) vf_next_control(vf, VFCTRL_FLIP_PAGE, NULL); |
9514 | 361 } |
14888
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
362 break; |
9514 | 363 case 1: |
14888
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
364 for (i=0; i<2; i++) { |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
365 dmpi = vf_get_image(vf->next, mpi->imgfmt, |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
366 MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE, |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
367 mpi->width, mpi->height); |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
368 my_memcpy_pic(dmpi->planes[0] + (i^!tff)*dmpi->stride[0], |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
369 mpi->planes[0] + (i^!tff)*mpi->stride[0], |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
370 mpi->w*bpp, mpi->h/2, dmpi->stride[0]*2, mpi->stride[0]*2); |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
371 deint(dmpi->planes[0], dmpi->stride[0], mpi->planes[0], mpi->stride[0], mpi->w, mpi->h, (i^!tff)); |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
372 if (mpi->flags & MP_IMGFLAG_PLANAR) { |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
373 my_memcpy_pic(dmpi->planes[1] + (i^!tff)*dmpi->stride[1], |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
374 mpi->planes[1] + (i^!tff)*mpi->stride[1], |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
375 mpi->chroma_width, mpi->chroma_height/2, |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
376 dmpi->stride[1]*2, mpi->stride[1]*2); |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
377 my_memcpy_pic(dmpi->planes[2] + (i^!tff)*dmpi->stride[2], |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
378 mpi->planes[2] + (i^!tff)*mpi->stride[2], |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
379 mpi->chroma_width, mpi->chroma_height/2, |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
380 dmpi->stride[2]*2, mpi->stride[2]*2); |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
381 deint(dmpi->planes[1], dmpi->stride[1], mpi->planes[1], mpi->stride[1], |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
382 mpi->chroma_width, mpi->chroma_height, (i^!tff)); |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
383 deint(dmpi->planes[2], dmpi->stride[2], mpi->planes[2], mpi->stride[2], |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
384 mpi->chroma_width, mpi->chroma_height, (i^!tff)); |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
385 } |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
386 ret |= vf_next_put_image(vf, dmpi); |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
387 if (!i) vf_next_control(vf, VFCTRL_FLIP_PAGE, NULL); |
9514 | 388 } |
14888
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
389 break; |
10009
69f10d08c3be
new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents:
9593
diff
changeset
|
390 case 2: |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
391 case 3: |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
392 case 4: |
14888
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
393 for (i=0; i<2; i++) { |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
394 dmpi = vf_get_image(vf->next, mpi->imgfmt, |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
395 MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE, |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
396 mpi->width, mpi->height/2); |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
397 qpel(dmpi->planes[0], mpi->planes[0] + (i^!tff)*mpi->stride[0], |
15012 | 398 mpi->w*bpp, mpi->h/2, dmpi->stride[0], mpi->stride[0]*2, (i^!tff)); |
14888
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
399 if (mpi->flags & MP_IMGFLAG_PLANAR) { |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
400 qpel(dmpi->planes[1], |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
401 mpi->planes[1] + (i^!tff)*mpi->stride[1], |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
402 mpi->chroma_width, mpi->chroma_height/2, |
15012 | 403 dmpi->stride[1], mpi->stride[1]*2, (i^!tff)); |
14888
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
404 qpel(dmpi->planes[2], |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
405 mpi->planes[2] + (i^!tff)*mpi->stride[2], |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
406 mpi->chroma_width, mpi->chroma_height/2, |
15012 | 407 dmpi->stride[2], mpi->stride[2]*2, (i^!tff)); |
14888
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
408 } |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
409 ret |= vf_next_put_image(vf, dmpi); |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
410 if (!i) vf_next_control(vf, VFCTRL_FLIP_PAGE, NULL); |
10009
69f10d08c3be
new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents:
9593
diff
changeset
|
411 } |
14888
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
412 break; |
9514 | 413 } |
14888
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
414 return ret; |
9514 | 415 } |
416 | |
417 static int query_format(struct vf_instance_s* vf, unsigned int fmt) | |
418 { | |
419 /* FIXME - figure out which other formats work */ | |
420 switch (fmt) { | |
421 case IMGFMT_YV12: | |
422 case IMGFMT_IYUV: | |
423 case IMGFMT_I420: | |
424 return vf_next_query_format(vf, fmt); | |
425 } | |
426 return 0; | |
427 } | |
428 | |
429 static int config(struct vf_instance_s* vf, | |
430 int width, int height, int d_width, int d_height, | |
431 unsigned int flags, unsigned int outfmt) | |
432 { | |
433 switch (vf->priv->mode) { | |
434 case 0: | |
10009
69f10d08c3be
new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents:
9593
diff
changeset
|
435 case 2: |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
436 case 3: |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
437 case 4: |
9514 | 438 return vf_next_config(vf,width,height/2,d_width,d_height,flags,outfmt); |
439 case 1: | |
440 return vf_next_config(vf,width,height,d_width,d_height,flags,outfmt); | |
441 } | |
442 return 0; | |
443 } | |
444 | |
445 static void uninit(struct vf_instance_s* vf) | |
446 { | |
447 free(vf->priv); | |
448 } | |
449 | |
450 static int open(vf_instance_t *vf, char* args) | |
451 { | |
452 struct vf_priv_s *p; | |
453 vf->config = config; | |
454 vf->put_image = put_image; | |
10078
379f48cace77
support more image formats. hopefully this bpp handling is correct...
rfelker
parents:
10052
diff
changeset
|
455 //vf->query_format = query_format; |
9514 | 456 vf->uninit = uninit; |
457 vf->default_reqs = VFCAP_ACCEPT_STRIDE; | |
458 vf->priv = p = calloc(1, sizeof(struct vf_priv_s)); | |
15013 | 459 vf->priv->mode = 4; |
14888
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
460 vf->priv->parity = -1; |
32dcf8672086
configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents:
13720
diff
changeset
|
461 if (args) sscanf(args, "%d:%d", &vf->priv->mode, &vf->priv->parity); |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
462 qpel_li = qpel_li_C; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
463 qpel_4tap = qpel_4tap_C; |
10020 | 464 #ifdef HAVE_MMX |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
465 if(gCpuCaps.hasMMX) qpel_li = qpel_li_MMX; |
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
466 if(gCpuCaps.hasMMX) qpel_4tap = qpel_4tap_MMX; |
10020 | 467 #endif |
468 #ifdef HAVE_MMX2 | |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
469 if(gCpuCaps.hasMMX2) qpel_li = qpel_li_MMX2; |
10020 | 470 #endif |
471 #ifdef HAVE_3DNOW | |
10049
765c2276aa0c
more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents:
10020
diff
changeset
|
472 if(gCpuCaps.has3DNow) qpel_li = qpel_li_3DNOW; |
10020 | 473 #endif |
9514 | 474 return 1; |
475 } | |
476 | |
477 vf_info_t vf_info_tfields = { | |
478 "temporal field separation", | |
479 "tfields", | |
480 "Rich Felker", | |
481 "", | |
9593
e9a2af584986
Add the new -vf option wich is the same as vop in reverse order.
albeu
parents:
9514
diff
changeset
|
482 open, |
e9a2af584986
Add the new -vf option wich is the same as vop in reverse order.
albeu
parents:
9514
diff
changeset
|
483 NULL |
9514 | 484 }; |
485 | |
486 |