annotate libmpcodecs/vf_tfields.c @ 30811:50e0f6942e43

Implement Win32 mutexes. Implement Win32 mutexes; they used to just be mapped on top of events, which is not the same thing at all. The implementation is pretty much the obvious one, similar to the current critical section implementation and the semaphore implementation; a single lock count protected by a pthread mutex, and an event lockers can sleep on to know when the mutex is available. Also make CreateMutexA and ReleaseMutex available even if QuickTime codecs support is not configured.
author sesse
date Sat, 06 Mar 2010 10:13:37 +0000
parents a972c1a4a012
children 3525dc14e3dc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
30421
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
1 /*
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
2 * This file is part of MPlayer.
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
3 *
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
4 * MPlayer is free software; you can redistribute it and/or modify
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
5 * it under the terms of the GNU General Public License as published by
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
6 * the Free Software Foundation; either version 2 of the License, or
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
7 * (at your option) any later version.
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
8 *
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
9 * MPlayer is distributed in the hope that it will be useful,
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
12 * GNU General Public License for more details.
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
13 *
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
14 * You should have received a copy of the GNU General Public License along
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
15 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
17 */
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
18
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
19 #include <stdio.h>
08264c647f46 new filter
rfelker
parents:
diff changeset
20 #include <stdlib.h>
08264c647f46 new filter
rfelker
parents:
diff changeset
21 #include <string.h>
08264c647f46 new filter
rfelker
parents:
diff changeset
22
17012
6ff3379a0862 Unify include path handling, -I.. is in CFLAGS.
diego
parents: 15013
diff changeset
23 #include "config.h"
6ff3379a0862 Unify include path handling, -I.. is in CFLAGS.
diego
parents: 15013
diff changeset
24 #include "mp_msg.h"
6ff3379a0862 Unify include path handling, -I.. is in CFLAGS.
diego
parents: 15013
diff changeset
25 #include "cpudetect.h"
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
26
08264c647f46 new filter
rfelker
parents:
diff changeset
27 #include "img_format.h"
08264c647f46 new filter
rfelker
parents:
diff changeset
28 #include "mp_image.h"
08264c647f46 new filter
rfelker
parents:
diff changeset
29 #include "vf.h"
08264c647f46 new filter
rfelker
parents:
diff changeset
30
17012
6ff3379a0862 Unify include path handling, -I.. is in CFLAGS.
diego
parents: 15013
diff changeset
31 #include "libvo/fastmemcpy.h"
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
32
08264c647f46 new filter
rfelker
parents:
diff changeset
33 struct vf_priv_s {
08264c647f46 new filter
rfelker
parents:
diff changeset
34 int mode;
14888
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
35 int parity;
18917
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
36 int buffered_i;
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
37 mp_image_t *buffered_mpi;
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
38 double buffered_pts;
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
39 };
08264c647f46 new filter
rfelker
parents:
diff changeset
40
08264c647f46 new filter
rfelker
parents:
diff changeset
41 static void deint(unsigned char *dest, int ds, unsigned char *src, int ss, int w, int h, int field)
08264c647f46 new filter
rfelker
parents:
diff changeset
42 {
08264c647f46 new filter
rfelker
parents:
diff changeset
43 int x, y;
08264c647f46 new filter
rfelker
parents:
diff changeset
44 src += ss;
08264c647f46 new filter
rfelker
parents:
diff changeset
45 dest += ds;
30441
9c15719d8769 Fix -vf tfields=1 so it does not read out of bounds or leave parts of the
reimar
parents: 30421
diff changeset
46 h--;
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
47 if (field) {
30441
9c15719d8769 Fix -vf tfields=1 so it does not read out of bounds or leave parts of the
reimar
parents: 30421
diff changeset
48 fast_memcpy(dest - ds, src - ss, w);
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
49 src += ss;
08264c647f46 new filter
rfelker
parents:
diff changeset
50 dest += ds;
30441
9c15719d8769 Fix -vf tfields=1 so it does not read out of bounds or leave parts of the
reimar
parents: 30421
diff changeset
51 h--;
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
52 }
30441
9c15719d8769 Fix -vf tfields=1 so it does not read out of bounds or leave parts of the
reimar
parents: 30421
diff changeset
53 for (y=h/2; y > 0; y--) {
9c15719d8769 Fix -vf tfields=1 so it does not read out of bounds or leave parts of the
reimar
parents: 30421
diff changeset
54 dest[0] = src[0];
9c15719d8769 Fix -vf tfields=1 so it does not read out of bounds or leave parts of the
reimar
parents: 30421
diff changeset
55 for (x=1; x<w-1; x++) {
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
56 if (((src[x-ss] < src[x]) && (src[x+ss] < src[x])) ||
08264c647f46 new filter
rfelker
parents:
diff changeset
57 ((src[x-ss] > src[x]) && (src[x+ss] > src[x]))) {
08264c647f46 new filter
rfelker
parents:
diff changeset
58 //dest[x] = (src[x+ss] + src[x-ss])>>1;
08264c647f46 new filter
rfelker
parents:
diff changeset
59 dest[x] = ((src[x+ss]<<1) + (src[x-ss]<<1)
08264c647f46 new filter
rfelker
parents:
diff changeset
60 + src[x+ss+1] + src[x-ss+1]
08264c647f46 new filter
rfelker
parents:
diff changeset
61 + src[x+ss-1] + src[x-ss-1])>>3;
08264c647f46 new filter
rfelker
parents:
diff changeset
62 }
08264c647f46 new filter
rfelker
parents:
diff changeset
63 else dest[x] = src[x];
08264c647f46 new filter
rfelker
parents:
diff changeset
64 }
30441
9c15719d8769 Fix -vf tfields=1 so it does not read out of bounds or leave parts of the
reimar
parents: 30421
diff changeset
65 dest[w-1] = src[w-1];
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
66 dest += ds<<1;
08264c647f46 new filter
rfelker
parents:
diff changeset
67 src += ss<<1;
08264c647f46 new filter
rfelker
parents:
diff changeset
68 }
30441
9c15719d8769 Fix -vf tfields=1 so it does not read out of bounds or leave parts of the
reimar
parents: 30421
diff changeset
69 if (h & 1)
9c15719d8769 Fix -vf tfields=1 so it does not read out of bounds or leave parts of the
reimar
parents: 30421
diff changeset
70 fast_memcpy(dest, src, w);
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
71 }
08264c647f46 new filter
rfelker
parents:
diff changeset
72
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28290
diff changeset
73 #if HAVE_AMD3DNOW
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
74 static void qpel_li_3DNOW(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up)
10009
69f10d08c3be new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents: 9593
diff changeset
75 {
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
76 int i, j, ssd=ss;
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
77 long crap1, crap2;
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
78 if (up) {
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
79 ssd = -ss;
23457
a124f3abc1ec Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents: 18986
diff changeset
80 fast_memcpy(d, s, w);
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
81 d += ds;
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
82 s += ss;
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
83 }
10009
69f10d08c3be new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents: 9593
diff changeset
84 for (i=h-1; i; i--) {
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25221
diff changeset
85 __asm__ volatile(
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
86 "1: \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
87 "movq (%%"REG_S"), %%mm0 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
88 "movq (%%"REG_S",%%"REG_a"), %%mm1 \n\t"
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
89 "pavgusb %%mm0, %%mm1 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
90 "add $8, %%"REG_S" \n\t"
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
91 "pavgusb %%mm0, %%mm1 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
92 "movq %%mm1, (%%"REG_D") \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
93 "add $8, %%"REG_D" \n\t"
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
94 "decl %%ecx \n\t"
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
95 "jnz 1b \n\t"
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
96 : "=S"(crap1), "=D"(crap2)
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
97 : "c"(w>>3), "S"(s), "D"(d), "a"((long)ssd)
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
98 );
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
99 for (j=w-(w&7); j<w; j++)
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
100 d[j] = (s[j+ssd] + 3*s[j])>>2;
10009
69f10d08c3be new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents: 9593
diff changeset
101 d += ds;
69f10d08c3be new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents: 9593
diff changeset
102 s += ss;
69f10d08c3be new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents: 9593
diff changeset
103 }
23457
a124f3abc1ec Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents: 18986
diff changeset
104 if (!up) fast_memcpy(d, s, w);
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25221
diff changeset
105 __asm__ volatile("emms \n\t" : : : "memory");
10009
69f10d08c3be new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents: 9593
diff changeset
106 }
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
107 #endif
10009
69f10d08c3be new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents: 9593
diff changeset
108
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 28175
diff changeset
109 #if HAVE_MMX2
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
110 static void qpel_li_MMX2(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up)
10009
69f10d08c3be new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents: 9593
diff changeset
111 {
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
112 int i, j, ssd=ss;
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
113 long crap1, crap2;
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
114 if (up) {
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
115 ssd = -ss;
23457
a124f3abc1ec Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents: 18986
diff changeset
116 fast_memcpy(d, s, w);
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
117 d += ds;
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
118 s += ss;
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
119 }
10009
69f10d08c3be new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents: 9593
diff changeset
120 for (i=h-1; i; i--) {
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25221
diff changeset
121 __asm__ volatile(
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
122 "pxor %%mm7, %%mm7 \n\t"
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
123 "2: \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
124 "movq (%%"REG_S"), %%mm0 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
125 "movq (%%"REG_S",%%"REG_a"), %%mm1 \n\t"
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
126 "pavgb %%mm0, %%mm1 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
127 "add $8, %%"REG_S" \n\t"
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
128 "pavgb %%mm0, %%mm1 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
129 "movq %%mm1, (%%"REG_D") \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
130 "add $8, %%"REG_D" \n\t"
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
131 "decl %%ecx \n\t"
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
132 "jnz 2b \n\t"
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
133 : "=S"(crap1), "=D"(crap2)
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
134 : "c"(w>>3), "S"(s), "D"(d), "a"((long)ssd)
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
135 );
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
136 for (j=w-(w&7); j<w; j++)
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
137 d[j] = (s[j+ssd] + 3*s[j])>>2;
10009
69f10d08c3be new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents: 9593
diff changeset
138 d += ds;
69f10d08c3be new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents: 9593
diff changeset
139 s += ss;
69f10d08c3be new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents: 9593
diff changeset
140 }
23457
a124f3abc1ec Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents: 18986
diff changeset
141 if (!up) fast_memcpy(d, s, w);
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25221
diff changeset
142 __asm__ volatile("emms \n\t" : : : "memory");
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
143 }
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
144 #endif
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
145
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 28175
diff changeset
146 #if HAVE_MMX
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
147 static void qpel_li_MMX(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up)
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
148 {
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
149 int i, j, ssd=ss;
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
150 int crap1, crap2;
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
151 if (up) {
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
152 ssd = -ss;
23457
a124f3abc1ec Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents: 18986
diff changeset
153 fast_memcpy(d, s, w);
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
154 d += ds;
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
155 s += ss;
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
156 }
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
157 for (i=h-1; i; i--) {
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25221
diff changeset
158 __asm__ volatile(
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
159 "pxor %%mm7, %%mm7 \n\t"
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
160 "3: \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
161 "movq (%%"REG_S"), %%mm0 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
162 "movq (%%"REG_S"), %%mm1 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
163 "movq (%%"REG_S",%%"REG_a"), %%mm2 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
164 "movq (%%"REG_S",%%"REG_a"), %%mm3 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
165 "add $8, %%"REG_S" \n\t"
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
166 "punpcklbw %%mm7, %%mm0 \n\t"
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
167 "punpckhbw %%mm7, %%mm1 \n\t"
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
168 "punpcklbw %%mm7, %%mm2 \n\t"
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
169 "punpckhbw %%mm7, %%mm3 \n\t"
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
170 "paddw %%mm0, %%mm2 \n\t"
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
171 "paddw %%mm1, %%mm3 \n\t"
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
172 "paddw %%mm0, %%mm2 \n\t"
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
173 "paddw %%mm1, %%mm3 \n\t"
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
174 "paddw %%mm0, %%mm2 \n\t"
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
175 "paddw %%mm1, %%mm3 \n\t"
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
176 "psrlw $2, %%mm2 \n\t"
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
177 "psrlw $2, %%mm3 \n\t"
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
178 "packsswb %%mm3, %%mm2 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
179 "movq %%mm2, (%%"REG_D") \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
180 "add $8, %%"REG_D" \n\t"
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
181 "decl %%ecx \n\t"
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
182 "jnz 3b \n\t"
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
183 : "=S"(crap1), "=D"(crap2)
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
184 : "c"(w>>3), "S"(s), "D"(d), "a"((long)ssd)
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
185 );
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
186 for (j=w-(w&7); j<w; j++)
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
187 d[j] = (s[j+ssd] + 3*s[j])>>2;
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
188 d += ds;
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
189 s += ss;
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
190 }
23457
a124f3abc1ec Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents: 18986
diff changeset
191 if (!up) fast_memcpy(d, s, w);
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25221
diff changeset
192 __asm__ volatile("emms \n\t" : : : "memory");
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
193 }
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
194
29639
26a355ffe458 Add several HAVE_EBX_AVAILABLE conditions where necessary
reimar
parents: 29263
diff changeset
195 #if HAVE_EBX_AVAILABLE
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
196 static void qpel_4tap_MMX(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up)
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
197 {
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
198 int i, j, ssd=ss;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
199 static const short filter[] = {
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
200 29, 29, 29, 29, 110, 110, 110, 110,
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
201 9, 9, 9, 9, 3, 3, 3, 3,
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
202 64, 64, 64, 64 };
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
203 int crap1, crap2;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
204 if (up) {
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
205 ssd = -ss;
23457
a124f3abc1ec Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents: 18986
diff changeset
206 fast_memcpy(d, s, w);
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
207 d += ds; s += ss;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
208 }
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
209 for (j=0; j<w; j++)
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
210 d[j] = (s[j+ssd] + 3*s[j])>>2;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
211 d += ds; s += ss;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
212 for (i=h-3; i; i--) {
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25221
diff changeset
213 __asm__ volatile(
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
214 "pxor %%mm0, %%mm0 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
215 "movq (%%"REG_d"), %%mm4 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
216 "movq 8(%%"REG_d"), %%mm5 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
217 "movq 16(%%"REG_d"), %%mm6 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
218 "movq 24(%%"REG_d"), %%mm7 \n\t"
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
219 "4: \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
220
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
221 "movq (%%"REG_S",%%"REG_a"), %%mm1 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
222 "movq (%%"REG_S"), %%mm2 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
223 "movq (%%"REG_S",%%"REG_b"), %%mm3 \n\t"
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
224 "punpcklbw %%mm0, %%mm1 \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
225 "punpcklbw %%mm0, %%mm2 \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
226 "pmullw %%mm4, %%mm1 \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
227 "punpcklbw %%mm0, %%mm3 \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
228 "pmullw %%mm5, %%mm2 \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
229 "paddusw %%mm2, %%mm1 \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
230 "pmullw %%mm6, %%mm3 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
231 "movq (%%"REG_S",%%"REG_a",2), %%mm2 \n\t"
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
232 "psubusw %%mm3, %%mm1 \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 28335
diff changeset
233 "punpcklbw %%mm0, %%mm2 \n\t"
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
234 "pmullw %%mm7, %%mm2 \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
235 "psubusw %%mm2, %%mm1 \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
236 "psrlw $7, %%mm1 \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
237
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
238 "movq (%%"REG_S",%%"REG_a"), %%mm2 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
239 "movq (%%"REG_S"), %%mm3 \n\t"
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
240 "punpckhbw %%mm0, %%mm2 \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
241 "punpckhbw %%mm0, %%mm3 \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
242 "pmullw %%mm4, %%mm2 \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
243 "pmullw %%mm5, %%mm3 \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
244 "paddusw %%mm3, %%mm2 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
245 "movq (%%"REG_S",%%"REG_b"), %%mm3 \n\t"
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
246 "punpckhbw %%mm0, %%mm3 \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
247 "pmullw %%mm6, %%mm3 \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
248 "psubusw %%mm3, %%mm2 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
249 "movq (%%"REG_S",%%"REG_a",2), %%mm3 \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 28335
diff changeset
250 "punpckhbw %%mm0, %%mm3 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
251 "add $8, %%"REG_S" \n\t"
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
252 "pmullw %%mm7, %%mm3 \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
253 "psubusw %%mm3, %%mm2 \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
254 "psrlw $7, %%mm2 \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 28335
diff changeset
255
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
256 "packuswb %%mm2, %%mm1 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
257 "movq %%mm1, (%%"REG_D") \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
258 "add $8, %%"REG_D" \n\t"
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
259 "decl %%ecx \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
260 "jnz 4b \n\t"
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
261 : "=S"(crap1), "=D"(crap2)
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 10078
diff changeset
262 : "c"(w>>3), "S"(s), "D"(d), "a"((long)ssd), "b"((long)-ssd), "d"(filter)
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
263 );
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
264 for (j=w-(w&7); j<w; j++)
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
265 d[j] = (-9*s[j-ssd] + 111*s[j] + 29*s[j+ssd] - 3*s[j+ssd+ssd])>>7;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
266 d += ds;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
267 s += ss;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
268 }
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
269 for (j=0; j<w; j++)
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
270 d[j] = (s[j+ssd] + 3*s[j])>>2;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
271 d += ds; s += ss;
23457
a124f3abc1ec Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents: 18986
diff changeset
272 if (!up) fast_memcpy(d, s, w);
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25221
diff changeset
273 __asm__ volatile("emms \n\t" : : : "memory");
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
274 }
29639
26a355ffe458 Add several HAVE_EBX_AVAILABLE conditions where necessary
reimar
parents: 29263
diff changeset
275 #endif /* HAVE_EBX_AVAILABLE */
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
276 #endif
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
277
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
278 static inline int clamp(int a)
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
279 {
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
280 // If a<512, this is equivalent to:
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
281 // return (a<0) ? 0 : ( (a>255) ? 255 : a);
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
282 return (~(a>>31)) & (a | ((a<<23)>>31));
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
283 }
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
284
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
285 static void qpel_li_C(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up)
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
286 {
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
287 int i, j, ssd=ss;
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
288 if (up) {
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
289 ssd = -ss;
23457
a124f3abc1ec Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents: 18986
diff changeset
290 fast_memcpy(d, s, w);
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
291 d += ds;
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
292 s += ss;
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
293 }
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
294 for (i=h-1; i; i--) {
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
295 for (j=0; j<w; j++)
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
296 d[j] = (s[j+ssd] + 3*s[j])>>2;
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
297 d += ds;
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
298 s += ss;
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
299 }
23457
a124f3abc1ec Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents: 18986
diff changeset
300 if (!up) fast_memcpy(d, s, w);
10009
69f10d08c3be new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents: 9593
diff changeset
301 }
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
302
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
303 static void qpel_4tap_C(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up)
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
304 {
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
305 int i, j, ssd=ss;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
306 if (up) {
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
307 ssd = -ss;
23457
a124f3abc1ec Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents: 18986
diff changeset
308 fast_memcpy(d, s, w);
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
309 d += ds; s += ss;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
310 }
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
311 for (j=0; j<w; j++)
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
312 d[j] = (s[j+ssd] + 3*s[j] + 2)>>2;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
313 d += ds; s += ss;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
314 for (i=h-3; i; i--) {
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
315 for (j=0; j<w; j++)
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
316 d[j] = clamp((-9*s[j-ssd] + 111*s[j] + 29*s[j+ssd] - 3*s[j+ssd+ssd] + 64)>>7);
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
317 d += ds; s += ss;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
318 }
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
319 for (j=0; j<w; j++)
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
320 d[j] = (s[j+ssd] + 3*s[j] + 2)>>2;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
321 d += ds; s += ss;
23457
a124f3abc1ec Replace implicit use of fast_memcpy via macro by explicit use to allow
reimar
parents: 18986
diff changeset
322 if (!up) fast_memcpy(d, s, w);
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
323 }
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
324
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
325 static void (*qpel_li)(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up);
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
326 static void (*qpel_4tap)(unsigned char *d, unsigned char *s, int w, int h, int ds, int ss, int up);
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
327
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
328 static int continue_buffered_image(struct vf_instance *vf);
18917
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
329 extern int correct_pts;
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
330
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
331 static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
332 {
18917
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
333 vf->priv->buffered_mpi = mpi;
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
334 vf->priv->buffered_pts = pts;
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
335 vf->priv->buffered_i = 0;
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
336 return continue_buffered_image(vf);
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
337 }
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
338
30443
f60a1db3aee4 Estimate pts of additional frame generated by tfields also if correct-pts is
reimar
parents: 30442
diff changeset
339 static double calc_pts(double base_pts, int field)
f60a1db3aee4 Estimate pts of additional frame generated by tfields also if correct-pts is
reimar
parents: 30442
diff changeset
340 {
f60a1db3aee4 Estimate pts of additional frame generated by tfields also if correct-pts is
reimar
parents: 30442
diff changeset
341 // FIXME this assumes 25 fps / 50 fields per second
f60a1db3aee4 Estimate pts of additional frame generated by tfields also if correct-pts is
reimar
parents: 30442
diff changeset
342 return base_pts + 0.02 * field;
f60a1db3aee4 Estimate pts of additional frame generated by tfields also if correct-pts is
reimar
parents: 30442
diff changeset
343 }
f60a1db3aee4 Estimate pts of additional frame generated by tfields also if correct-pts is
reimar
parents: 30442
diff changeset
344
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
345 static int continue_buffered_image(struct vf_instance *vf)
18917
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
346 {
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
347 int i=vf->priv->buffered_i;
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
348 double pts = vf->priv->buffered_pts;
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
349 mp_image_t *mpi = vf->priv->buffered_mpi;
14888
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
350 int ret=0;
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
351 mp_image_t *dmpi;
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
352 void (*qpel)(unsigned char *, unsigned char *, int, int, int, int, int);
10078
379f48cace77 support more image formats. hopefully this bpp handling is correct...
rfelker
parents: 10052
diff changeset
353 int bpp=1;
14888
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
354 int tff;
10078
379f48cace77 support more image formats. hopefully this bpp handling is correct...
rfelker
parents: 10052
diff changeset
355
18986
d743c48823cc c++ decls, 100000000000l to whoever broke my code like this..
rfelker
parents: 18917
diff changeset
356 if (i == 0)
d743c48823cc c++ decls, 100000000000l to whoever broke my code like this..
rfelker
parents: 18917
diff changeset
357 vf_queue_frame(vf, continue_buffered_image);
d743c48823cc c++ decls, 100000000000l to whoever broke my code like this..
rfelker
parents: 18917
diff changeset
358
10078
379f48cace77 support more image formats. hopefully this bpp handling is correct...
rfelker
parents: 10052
diff changeset
359 if (!(mpi->flags & MP_IMGFLAG_PLANAR)) bpp = mpi->bpp/8;
14888
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
360 if (vf->priv->parity < 0) {
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
361 if (mpi->fields & MP_IMGFIELD_ORDERED)
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
362 tff = mpi->fields & MP_IMGFIELD_TOP_FIRST;
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
363 else
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
364 tff = 1;
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
365 }
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
366 else tff = (vf->priv->parity&1)^1;
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
367
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
368 switch (vf->priv->mode) {
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
369 case 2:
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
370 qpel = qpel_li;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
371 break;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
372 case 3:
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
373 // TODO: add 3tap filter
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
374 qpel = qpel_4tap;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
375 break;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
376 case 4:
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
377 qpel = qpel_4tap;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
378 break;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
379 }
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
380
08264c647f46 new filter
rfelker
parents:
diff changeset
381 switch (vf->priv->mode) {
08264c647f46 new filter
rfelker
parents:
diff changeset
382 case 0:
18917
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
383 for (; i<2; i++) {
14888
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
384 dmpi = vf_get_image(vf->next, mpi->imgfmt,
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
385 MP_IMGTYPE_EXPORT, MP_IMGFLAG_ACCEPT_STRIDE,
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
386 mpi->width, mpi->height/2);
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
387 dmpi->planes[0] = mpi->planes[0] + (i^!tff)*mpi->stride[0];
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
388 dmpi->stride[0] = 2*mpi->stride[0];
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
389 if (mpi->flags & MP_IMGFLAG_PLANAR) {
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
390 dmpi->planes[1] = mpi->planes[1] + (i^!tff)*mpi->stride[1];
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
391 dmpi->planes[2] = mpi->planes[2] + (i^!tff)*mpi->stride[2];
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
392 dmpi->stride[1] = 2*mpi->stride[1];
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
393 dmpi->stride[2] = 2*mpi->stride[2];
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
394 }
30443
f60a1db3aee4 Estimate pts of additional frame generated by tfields also if correct-pts is
reimar
parents: 30442
diff changeset
395 ret |= vf_next_put_image(vf, dmpi, calc_pts(pts, i));
18917
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
396 if (correct_pts)
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
397 break;
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
398 else
28175
60402016152c Fix OSD flicker with tfields as well.
reimar
parents: 27754
diff changeset
399 if (!i) vf_extra_flip(vf);
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
400 }
14888
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
401 break;
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
402 case 1:
18917
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
403 for (; i<2; i++) {
14888
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
404 dmpi = vf_get_image(vf->next, mpi->imgfmt,
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
405 MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE,
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
406 mpi->width, mpi->height);
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
407 my_memcpy_pic(dmpi->planes[0] + (i^!tff)*dmpi->stride[0],
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
408 mpi->planes[0] + (i^!tff)*mpi->stride[0],
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
409 mpi->w*bpp, mpi->h/2, dmpi->stride[0]*2, mpi->stride[0]*2);
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
410 deint(dmpi->planes[0], dmpi->stride[0], mpi->planes[0], mpi->stride[0], mpi->w, mpi->h, (i^!tff));
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
411 if (mpi->flags & MP_IMGFLAG_PLANAR) {
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
412 my_memcpy_pic(dmpi->planes[1] + (i^!tff)*dmpi->stride[1],
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
413 mpi->planes[1] + (i^!tff)*mpi->stride[1],
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
414 mpi->chroma_width, mpi->chroma_height/2,
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
415 dmpi->stride[1]*2, mpi->stride[1]*2);
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
416 my_memcpy_pic(dmpi->planes[2] + (i^!tff)*dmpi->stride[2],
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
417 mpi->planes[2] + (i^!tff)*mpi->stride[2],
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
418 mpi->chroma_width, mpi->chroma_height/2,
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
419 dmpi->stride[2]*2, mpi->stride[2]*2);
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
420 deint(dmpi->planes[1], dmpi->stride[1], mpi->planes[1], mpi->stride[1],
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
421 mpi->chroma_width, mpi->chroma_height, (i^!tff));
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
422 deint(dmpi->planes[2], dmpi->stride[2], mpi->planes[2], mpi->stride[2],
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
423 mpi->chroma_width, mpi->chroma_height, (i^!tff));
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
424 }
30443
f60a1db3aee4 Estimate pts of additional frame generated by tfields also if correct-pts is
reimar
parents: 30442
diff changeset
425 ret |= vf_next_put_image(vf, dmpi, calc_pts(pts, i));
18917
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
426 if (correct_pts)
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
427 break;
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
428 else
28175
60402016152c Fix OSD flicker with tfields as well.
reimar
parents: 27754
diff changeset
429 if (!i) vf_extra_flip(vf);
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
430 }
14888
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
431 break;
10009
69f10d08c3be new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents: 9593
diff changeset
432 case 2:
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
433 case 3:
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
434 case 4:
18917
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
435 for (; i<2; i++) {
14888
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
436 dmpi = vf_get_image(vf->next, mpi->imgfmt,
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
437 MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE,
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
438 mpi->width, mpi->height/2);
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
439 qpel(dmpi->planes[0], mpi->planes[0] + (i^!tff)*mpi->stride[0],
15012
ad8815f740d3 1000l, last commit broke qpel interp entirely
rfelker
parents: 14888
diff changeset
440 mpi->w*bpp, mpi->h/2, dmpi->stride[0], mpi->stride[0]*2, (i^!tff));
14888
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
441 if (mpi->flags & MP_IMGFLAG_PLANAR) {
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
442 qpel(dmpi->planes[1],
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
443 mpi->planes[1] + (i^!tff)*mpi->stride[1],
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
444 mpi->chroma_width, mpi->chroma_height/2,
15012
ad8815f740d3 1000l, last commit broke qpel interp entirely
rfelker
parents: 14888
diff changeset
445 dmpi->stride[1], mpi->stride[1]*2, (i^!tff));
14888
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
446 qpel(dmpi->planes[2],
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
447 mpi->planes[2] + (i^!tff)*mpi->stride[2],
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
448 mpi->chroma_width, mpi->chroma_height/2,
15012
ad8815f740d3 1000l, last commit broke qpel interp entirely
rfelker
parents: 14888
diff changeset
449 dmpi->stride[2], mpi->stride[2]*2, (i^!tff));
14888
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
450 }
30443
f60a1db3aee4 Estimate pts of additional frame generated by tfields also if correct-pts is
reimar
parents: 30442
diff changeset
451 ret |= vf_next_put_image(vf, dmpi, calc_pts(pts, i));
18917
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
452 if (correct_pts)
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
453 break;
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
454 else
28175
60402016152c Fix OSD flicker with tfields as well.
reimar
parents: 27754
diff changeset
455 if (!i) vf_extra_flip(vf);
10009
69f10d08c3be new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents: 9593
diff changeset
456 }
14888
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
457 break;
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
458 }
18917
d9a75b26da6c Add a new video pts tracking mode, enabled by option -correct-pts.
uau
parents: 17906
diff changeset
459 vf->priv->buffered_i = 1;
14888
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
460 return ret;
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
461 }
08264c647f46 new filter
rfelker
parents:
diff changeset
462
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
463 static int query_format(struct vf_instance *vf, unsigned int fmt)
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
464 {
30442
8d7a061d427b Enable query_format for vf_tfields to make sure it isn't used for formats
reimar
parents: 30441
diff changeset
465 /* FIXME - figure out which formats exactly work */
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
466 switch (fmt) {
30442
8d7a061d427b Enable query_format for vf_tfields to make sure it isn't used for formats
reimar
parents: 30441
diff changeset
467 default:
8d7a061d427b Enable query_format for vf_tfields to make sure it isn't used for formats
reimar
parents: 30441
diff changeset
468 if (vf->priv->mode == 1)
8d7a061d427b Enable query_format for vf_tfields to make sure it isn't used for formats
reimar
parents: 30441
diff changeset
469 return 0;
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
470 case IMGFMT_YV12:
08264c647f46 new filter
rfelker
parents:
diff changeset
471 case IMGFMT_IYUV:
08264c647f46 new filter
rfelker
parents:
diff changeset
472 case IMGFMT_I420:
08264c647f46 new filter
rfelker
parents:
diff changeset
473 return vf_next_query_format(vf, fmt);
08264c647f46 new filter
rfelker
parents:
diff changeset
474 }
08264c647f46 new filter
rfelker
parents:
diff changeset
475 return 0;
08264c647f46 new filter
rfelker
parents:
diff changeset
476 }
08264c647f46 new filter
rfelker
parents:
diff changeset
477
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
478 static int config(struct vf_instance *vf,
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
479 int width, int height, int d_width, int d_height,
08264c647f46 new filter
rfelker
parents:
diff changeset
480 unsigned int flags, unsigned int outfmt)
08264c647f46 new filter
rfelker
parents:
diff changeset
481 {
08264c647f46 new filter
rfelker
parents:
diff changeset
482 switch (vf->priv->mode) {
08264c647f46 new filter
rfelker
parents:
diff changeset
483 case 0:
10009
69f10d08c3be new mode for tfields filter -- shifts fields by a quarter-pixel so the
rfelker
parents: 9593
diff changeset
484 case 2:
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
485 case 3:
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
486 case 4:
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
487 return vf_next_config(vf,width,height/2,d_width,d_height,flags,outfmt);
08264c647f46 new filter
rfelker
parents:
diff changeset
488 case 1:
08264c647f46 new filter
rfelker
parents:
diff changeset
489 return vf_next_config(vf,width,height,d_width,d_height,flags,outfmt);
08264c647f46 new filter
rfelker
parents:
diff changeset
490 }
08264c647f46 new filter
rfelker
parents:
diff changeset
491 return 0;
08264c647f46 new filter
rfelker
parents:
diff changeset
492 }
08264c647f46 new filter
rfelker
parents:
diff changeset
493
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
494 static void uninit(struct vf_instance *vf)
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
495 {
08264c647f46 new filter
rfelker
parents:
diff changeset
496 free(vf->priv);
08264c647f46 new filter
rfelker
parents:
diff changeset
497 }
08264c647f46 new filter
rfelker
parents:
diff changeset
498
30638
a7b908875c14 Rename open() vf initialization function to vf_open().
diego
parents: 30633
diff changeset
499 static int vf_open(vf_instance_t *vf, char *args)
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
500 {
08264c647f46 new filter
rfelker
parents:
diff changeset
501 struct vf_priv_s *p;
08264c647f46 new filter
rfelker
parents:
diff changeset
502 vf->config = config;
08264c647f46 new filter
rfelker
parents:
diff changeset
503 vf->put_image = put_image;
30442
8d7a061d427b Enable query_format for vf_tfields to make sure it isn't used for formats
reimar
parents: 30441
diff changeset
504 vf->query_format = query_format;
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
505 vf->uninit = uninit;
08264c647f46 new filter
rfelker
parents:
diff changeset
506 vf->default_reqs = VFCAP_ACCEPT_STRIDE;
08264c647f46 new filter
rfelker
parents:
diff changeset
507 vf->priv = p = calloc(1, sizeof(struct vf_priv_s));
15013
0bb95cd581b6 sane default mode
rfelker
parents: 15012
diff changeset
508 vf->priv->mode = 4;
14888
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
509 vf->priv->parity = -1;
32dcf8672086 configurable field parity (default from source); bugfixes; speed up mode 0
rfelker
parents: 13720
diff changeset
510 if (args) sscanf(args, "%d:%d", &vf->priv->mode, &vf->priv->parity);
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
511 qpel_li = qpel_li_C;
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
512 qpel_4tap = qpel_4tap_C;
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 28175
diff changeset
513 #if HAVE_MMX
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
514 if(gCpuCaps.hasMMX) qpel_li = qpel_li_MMX;
29639
26a355ffe458 Add several HAVE_EBX_AVAILABLE conditions where necessary
reimar
parents: 29263
diff changeset
515 #if HAVE_EBX_AVAILABLE
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
516 if(gCpuCaps.hasMMX) qpel_4tap = qpel_4tap_MMX;
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
517 #endif
29639
26a355ffe458 Add several HAVE_EBX_AVAILABLE conditions where necessary
reimar
parents: 29263
diff changeset
518 #endif
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 28175
diff changeset
519 #if HAVE_MMX2
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
520 if(gCpuCaps.hasMMX2) qpel_li = qpel_li_MMX2;
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
521 #endif
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28290
diff changeset
522 #if HAVE_AMD3DNOW
10049
765c2276aa0c more 10l's -- fortunately part of the bug was that the buggy code didn't get called...
rfelker
parents: 10020
diff changeset
523 if(gCpuCaps.has3DNow) qpel_li = qpel_li_3DNOW;
10020
9829b7e61b55 new mmx/mmx2/3dnow code for improved performance
rfelker
parents: 10009
diff changeset
524 #endif
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
525 return 1;
08264c647f46 new filter
rfelker
parents:
diff changeset
526 }
08264c647f46 new filter
rfelker
parents:
diff changeset
527
25221
00fff9a3b735 Make all vf_info_t structs const
reimar
parents: 24605
diff changeset
528 const vf_info_t vf_info_tfields = {
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
529 "temporal field separation",
08264c647f46 new filter
rfelker
parents:
diff changeset
530 "tfields",
08264c647f46 new filter
rfelker
parents:
diff changeset
531 "Rich Felker",
08264c647f46 new filter
rfelker
parents:
diff changeset
532 "",
30638
a7b908875c14 Rename open() vf initialization function to vf_open().
diego
parents: 30633
diff changeset
533 vf_open,
9593
e9a2af584986 Add the new -vf option wich is the same as vop in reverse order.
albeu
parents: 9514
diff changeset
534 NULL
9514
08264c647f46 new filter
rfelker
parents:
diff changeset
535 };