annotate libmpcodecs/vf_ilpack.c @ 32282:606e4157cd4c

Split alloc and init of context so that parameters can be set in the context instead of requireing being passed through function parameters. This also makes sws work with AVOptions.
author michael
date Sun, 26 Sep 2010 19:33:57 +0000
parents a972c1a4a012
children 7af3e6f901fd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
30421
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
1 /*
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
2 * This file is part of MPlayer.
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
3 *
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
4 * MPlayer is free software; you can redistribute it and/or modify
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
5 * it under the terms of the GNU General Public License as published by
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
6 * the Free Software Foundation; either version 2 of the License, or
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
7 * (at your option) any later version.
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
8 *
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
9 * MPlayer is distributed in the hope that it will be useful,
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
12 * GNU General Public License for more details.
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
13 *
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
14 * You should have received a copy of the GNU General Public License along
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
15 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
17 */
bbb6ebec87a0 Add missing license headers to all files in the libmpcodecs directory.
diego
parents: 29639
diff changeset
18
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
19 #include <stdio.h>
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
20 #include <stdlib.h>
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
21 #include <string.h>
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
22 #include <inttypes.h>
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
23
17012
6ff3379a0862 Unify include path handling, -I.. is in CFLAGS.
diego
parents: 13720
diff changeset
24 #include "config.h"
6ff3379a0862 Unify include path handling, -I.. is in CFLAGS.
diego
parents: 13720
diff changeset
25 #include "mp_msg.h"
6ff3379a0862 Unify include path handling, -I.. is in CFLAGS.
diego
parents: 13720
diff changeset
26 #include "cpudetect.h"
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
27
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
28 #include "img_format.h"
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
29 #include "mp_image.h"
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
30 #include "vf.h"
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
31
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
32 typedef void (pack_func_t)(unsigned char *dst, unsigned char *y,
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
33 unsigned char *u, unsigned char *v, int w, int us, int vs);
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
34
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
35 struct vf_priv_s {
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
36 int mode;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
37 pack_func_t *pack[2];
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
38 };
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
39
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
40 static void pack_nn_C(unsigned char *dst, unsigned char *y,
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
41 unsigned char *u, unsigned char *v, int w)
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
42 {
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
43 int j;
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
44 for (j = w/2; j; j--) {
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
45 *dst++ = *y++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
46 *dst++ = *u++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
47 *dst++ = *y++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
48 *dst++ = *v++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
49 }
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
50 }
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
51
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
52 static void pack_li_0_C(unsigned char *dst, unsigned char *y,
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
53 unsigned char *u, unsigned char *v, int w, int us, int vs)
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
54 {
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
55 int j;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
56 for (j = w/2; j; j--) {
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
57 *dst++ = *y++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
58 *dst++ = (u[us+us] + 7*u[0])>>3;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
59 *dst++ = *y++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
60 *dst++ = (v[vs+vs] + 7*v[0])>>3;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
61 u++; v++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
62 }
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
63 }
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
64
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
65 static void pack_li_1_C(unsigned char *dst, unsigned char *y,
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
66 unsigned char *u, unsigned char *v, int w, int us, int vs)
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
67 {
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
68 int j;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
69 for (j = w/2; j; j--) {
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
70 *dst++ = *y++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
71 *dst++ = (3*u[us+us] + 5*u[0])>>3;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
72 *dst++ = *y++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
73 *dst++ = (3*v[vs+vs] + 5*v[0])>>3;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
74 u++; v++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
75 }
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
76 }
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
77
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
78 #if HAVE_MMX
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
79 static void pack_nn_MMX(unsigned char *dst, unsigned char *y,
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
80 unsigned char *u, unsigned char *v, int w)
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
81 {
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25221
diff changeset
82 __asm__ volatile (""
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18746
diff changeset
83 ASMALIGN(4)
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
84 "1: \n\t"
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
85 "movq (%0), %%mm1 \n\t"
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
86 "movq (%0), %%mm2 \n\t"
11648
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
87 "movq (%1), %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
88 "movq (%2), %%mm6 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
89 "punpcklbw %%mm6, %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
90 "punpcklbw %%mm4, %%mm1 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
91 "punpckhbw %%mm4, %%mm2 \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
92
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
93 "add $8, %0 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
94 "add $4, %1 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
95 "add $4, %2 \n\t"
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
96 "movq %%mm1, (%3) \n\t"
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
97 "movq %%mm2, 8(%3) \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
98 "add $16, %3 \n\t"
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
99 "decl %4 \n\t"
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
100 "jnz 1b \n\t"
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
101 "emms \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
102 :
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
103 : "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8)
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
104 : "memory"
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
105 );
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
106 pack_nn_C(dst, y, u, v, (w&7));
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
107 }
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
108
29639
26a355ffe458 Add several HAVE_EBX_AVAILABLE conditions where necessary
reimar
parents: 29263
diff changeset
109 #if HAVE_EBX_AVAILABLE
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
110 static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
111 unsigned char *u, unsigned char *v, int w, int us, int vs)
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
112 {
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25221
diff changeset
113 __asm__ volatile (""
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
114 "push %%"REG_BP" \n\t"
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
115 #if ARCH_X86_64
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
116 "mov %6, %%"REG_BP" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
117 #else
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
118 "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
119 "movl (%%"REG_d"), %%"REG_d" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
120 #endif
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
121 "pxor %%mm0, %%mm0 \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
122
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18746
diff changeset
123 ASMALIGN(4)
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
124 ".Lli0: \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
125 "movq (%%"REG_S"), %%mm1 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
126 "movq (%%"REG_S"), %%mm2 \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
127
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
128 "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
129 "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
130 "punpcklbw %%mm0, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
131 "punpcklbw %%mm0, %%mm6 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
132 "movq (%%"REG_a"), %%mm3 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
133 "movq (%%"REG_b"), %%mm5 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
134 "punpcklbw %%mm0, %%mm3 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
135 "punpcklbw %%mm0, %%mm5 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
136 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
137 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
138 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
139 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
140 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
141 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
142 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
143 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
144 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
145 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
146 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
147 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
148 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
149 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
150 "psrlw $3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
151 "psrlw $3, %%mm6 \n\t"
11648
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
152 "packuswb %%mm4, %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
153 "packuswb %%mm6, %%mm6 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
154 "punpcklbw %%mm6, %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
155 "punpcklbw %%mm4, %%mm1 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
156 "punpckhbw %%mm4, %%mm2 \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
157
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
158 "movq %%mm1, (%%"REG_D") \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
159 "movq %%mm2, 8(%%"REG_D") \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
160
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
161 "movq 8(%%"REG_S"), %%mm1 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
162 "movq 8(%%"REG_S"), %%mm2 \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
163
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
164 "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
165 "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
166 "punpckhbw %%mm0, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
167 "punpckhbw %%mm0, %%mm6 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
168 "movq (%%"REG_a"), %%mm3 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
169 "movq (%%"REG_b"), %%mm5 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
170 "punpckhbw %%mm0, %%mm3 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
171 "punpckhbw %%mm0, %%mm5 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
172 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
173 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
174 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
175 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
176 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
177 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
178 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
179 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
180 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
181 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
182 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
183 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
184 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
185 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
186 "psrlw $3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
187 "psrlw $3, %%mm6 \n\t"
11648
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
188 "packuswb %%mm4, %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
189 "packuswb %%mm6, %%mm6 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
190 "punpcklbw %%mm6, %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
191 "punpcklbw %%mm4, %%mm1 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
192 "punpckhbw %%mm4, %%mm2 \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
193
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
194 "add $16, %%"REG_S" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
195 "add $8, %%"REG_a" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
196 "add $8, %%"REG_b" \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
197
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
198 "movq %%mm1, 16(%%"REG_D") \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
199 "movq %%mm2, 24(%%"REG_D") \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
200 "add $32, %%"REG_D" \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
201
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
202 "decl %%ecx \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
203 "jnz .Lli0 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
204 "emms \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
205 "pop %%"REG_BP" \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
206 :
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
207 : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
208 #if ARCH_X86_64
29040
963f578121c6 Use x86_reg instead of long in several video filters to fix compilation on MinGW64.
reimar
parents: 28290
diff changeset
209 "d" ((x86_reg)us), "r" ((x86_reg)vs)
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
210 #else
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
211 "d" (&us)
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
212 #endif
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
213 : "memory"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
214 );
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
215 pack_li_0_C(dst, y, u, v, (w&15), us, vs);
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
216 }
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
217
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
218 static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
219 unsigned char *u, unsigned char *v, int w, int us, int vs)
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
220 {
27754
08d18fe9da52 Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents: 25221
diff changeset
221 __asm__ volatile (""
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
222 "push %%"REG_BP" \n\t"
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
223 #if ARCH_X86_64
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
224 "mov %6, %%"REG_BP" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
225 #else
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
226 "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
227 "movl (%%"REG_d"), %%"REG_d" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
228 #endif
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
229 "pxor %%mm0, %%mm0 \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
230
19372
6334c14b38eb Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents: 18746
diff changeset
231 ASMALIGN(4)
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
232 ".Lli1: \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
233 "movq (%%"REG_S"), %%mm1 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
234 "movq (%%"REG_S"), %%mm2 \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
235
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
236 "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
237 "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
238 "punpcklbw %%mm0, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
239 "punpcklbw %%mm0, %%mm6 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
240 "movq (%%"REG_a"), %%mm3 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
241 "movq (%%"REG_b"), %%mm5 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
242 "punpcklbw %%mm0, %%mm3 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
243 "punpcklbw %%mm0, %%mm5 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
244 "movq %%mm4, %%mm7 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
245 "paddw %%mm4, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
246 "paddw %%mm7, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
247 "movq %%mm6, %%mm7 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
248 "paddw %%mm6, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
249 "paddw %%mm7, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
250 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
251 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
252 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
253 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
254 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
255 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
256 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
257 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
258 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
259 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
260 "psrlw $3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
261 "psrlw $3, %%mm6 \n\t"
11648
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
262 "packuswb %%mm4, %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
263 "packuswb %%mm6, %%mm6 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
264 "punpcklbw %%mm6, %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
265 "punpcklbw %%mm4, %%mm1 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
266 "punpckhbw %%mm4, %%mm2 \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
267
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
268 "movq %%mm1, (%%"REG_D") \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
269 "movq %%mm2, 8(%%"REG_D") \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
270
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
271 "movq 8(%%"REG_S"), %%mm1 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
272 "movq 8(%%"REG_S"), %%mm2 \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
273
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
274 "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
275 "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
276 "punpckhbw %%mm0, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
277 "punpckhbw %%mm0, %%mm6 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
278 "movq (%%"REG_a"), %%mm3 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
279 "movq (%%"REG_b"), %%mm5 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
280 "punpckhbw %%mm0, %%mm3 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
281 "punpckhbw %%mm0, %%mm5 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
282 "movq %%mm4, %%mm7 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
283 "paddw %%mm4, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
284 "paddw %%mm7, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
285 "movq %%mm6, %%mm7 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
286 "paddw %%mm6, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
287 "paddw %%mm7, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
288 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
289 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
290 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
291 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
292 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
293 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
294 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
295 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
296 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
297 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
298 "psrlw $3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
299 "psrlw $3, %%mm6 \n\t"
11648
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
300 "packuswb %%mm4, %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
301 "packuswb %%mm6, %%mm6 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
302 "punpcklbw %%mm6, %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
303 "punpcklbw %%mm4, %%mm1 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
304 "punpckhbw %%mm4, %%mm2 \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
305
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
306 "add $16, %%"REG_S" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
307 "add $8, %%"REG_a" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
308 "add $8, %%"REG_b" \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
309
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
310 "movq %%mm1, 16(%%"REG_D") \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
311 "movq %%mm2, 24(%%"REG_D") \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
312 "add $32, %%"REG_D" \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
313
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
314 "decl %%ecx \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
315 "jnz .Lli1 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
316 "emms \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
317 "pop %%"REG_BP" \n\t"
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
318 :
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
319 : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
320 #if ARCH_X86_64
29040
963f578121c6 Use x86_reg instead of long in several video filters to fix compilation on MinGW64.
reimar
parents: 28290
diff changeset
321 "d" ((x86_reg)us), "r" ((x86_reg)vs)
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
322 #else
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
323 "d" (&us)
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
324 #endif
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
325 : "memory"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
326 );
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
327 pack_li_1_C(dst, y, u, v, (w&15), us, vs);
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
328 }
29639
26a355ffe458 Add several HAVE_EBX_AVAILABLE conditions where necessary
reimar
parents: 29263
diff changeset
329 #endif /* HAVE_EBX_AVAILABLE */
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
330 #endif
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
331
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
332 static pack_func_t *pack_nn;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
333 static pack_func_t *pack_li_0;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
334 static pack_func_t *pack_li_1;
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
335
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
336 static void ilpack(unsigned char *dst, unsigned char *src[3],
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
337 int dststride, int srcstride[3], int w, int h, pack_func_t *pack[2])
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
338 {
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
339 int i;
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
340 unsigned char *y, *u, *v;
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
341 int ys = srcstride[0], us = srcstride[1], vs = srcstride[2];
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
342 int a, b;
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
343
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
344 y = src[0];
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
345 u = src[1];
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
346 v = src[2];
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
347
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
348 pack_nn(dst, y, u, v, w, 0, 0);
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
349 y += ys; dst += dststride;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
350 pack_nn(dst, y, u+us, v+vs, w, 0, 0);
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
351 y += ys; dst += dststride;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
352 for (i=2; i<h-2; i++) {
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
353 a = (i&2) ? 1 : -1;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
354 b = (i&1) ^ ((i&2)>>1);
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
355 pack[b](dst, y, u, v, w, us*a, vs*a);
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
356 y += ys;
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
357 if ((i&3) == 1) {
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
358 u -= us;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
359 v -= vs;
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
360 } else {
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
361 u += us;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
362 v += vs;
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
363 }
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
364 dst += dststride;
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
365 }
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
366 pack_nn(dst, y, u, v, w, 0, 0);
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
367 y += ys; dst += dststride; u += us; v += vs;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
368 pack_nn(dst, y, u, v, w, 0, 0);
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
369 }
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
370
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
371
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
372 static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
373 {
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
374 mp_image_t *dmpi;
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
375
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
376 // hope we'll get DR buffer:
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
377 dmpi=vf_get_image(vf->next, IMGFMT_YUY2,
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
378 MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE,
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
379 mpi->w, mpi->h);
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
380
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
381 ilpack(dmpi->planes[0], mpi->planes, dmpi->stride[0], mpi->stride, mpi->w, mpi->h, vf->priv->pack);
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
382
17906
20aca9baf5d8 passing pts through the filter layer (lets see if pts or cola comes out at the end)
michael
parents: 17012
diff changeset
383 return vf_next_put_image(vf,dmpi, pts);
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
384 }
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
385
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
386 static int config(struct vf_instance *vf,
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
387 int width, int height, int d_width, int d_height,
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
388 unsigned int flags, unsigned int outfmt)
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
389 {
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
390 /* FIXME - also support UYVY output? */
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
391 return vf_next_config(vf, width, height, d_width, d_height, flags, IMGFMT_YUY2);
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
392 }
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
393
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
394
30642
a972c1a4a012 cosmetics: Rename struct vf_instance_s --> vf_instance.
diego
parents: 30638
diff changeset
395 static int query_format(struct vf_instance *vf, unsigned int fmt)
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
396 {
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
397 /* FIXME - really any YUV 4:2:0 input format should work */
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
398 switch (fmt) {
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
399 case IMGFMT_YV12:
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
400 case IMGFMT_IYUV:
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
401 case IMGFMT_I420:
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
402 return vf_next_query_format(vf,IMGFMT_YUY2);
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
403 }
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
404 return 0;
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
405 }
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
406
30638
a7b908875c14 Rename open() vf initialization function to vf_open().
diego
parents: 30633
diff changeset
407 static int vf_open(vf_instance_t *vf, char *args)
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
408 {
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
409 vf->config=config;
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
410 vf->query_format=query_format;
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
411 vf->put_image=put_image;
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
412 vf->priv = calloc(1, sizeof(struct vf_priv_s));
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
413 vf->priv->mode = 1;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
414 if (args) sscanf(args, "%d", &vf->priv->mode);
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
415
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
416 pack_nn = (pack_func_t *)pack_nn_C;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
417 pack_li_0 = pack_li_0_C;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
418 pack_li_1 = pack_li_1_C;
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27754
diff changeset
419 #if HAVE_MMX
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
420 if(gCpuCaps.hasMMX) {
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
421 pack_nn = (pack_func_t *)pack_nn_MMX;
29639
26a355ffe458 Add several HAVE_EBX_AVAILABLE conditions where necessary
reimar
parents: 29263
diff changeset
422 #if HAVE_EBX_AVAILABLE
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
423 pack_li_0 = pack_li_0_MMX;
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
424 pack_li_1 = pack_li_1_MMX;
29639
26a355ffe458 Add several HAVE_EBX_AVAILABLE conditions where necessary
reimar
parents: 29263
diff changeset
425 #endif
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
426 }
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
427 #endif
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
428
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
429 switch(vf->priv->mode) {
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
430 case 0:
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
431 vf->priv->pack[0] = vf->priv->pack[1] = pack_nn;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
432 break;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
433 default:
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
434 mp_msg(MSGT_VFILTER, MSGL_WARN,
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
435 "ilpack: unknown mode %d (fallback to linear)\n",
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
436 vf->priv->mode);
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
437 case 1:
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
438 vf->priv->pack[0] = pack_li_0;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
439 vf->priv->pack[1] = pack_li_1;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
440 break;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
441 }
29263
0f1b5b68af32 whitespace cosmetics: Remove all trailing whitespace.
diego
parents: 29040
diff changeset
442
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
443 return 1;
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
444 }
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
445
25221
00fff9a3b735 Make all vf_info_t structs const
reimar
parents: 23373
diff changeset
446 const vf_info_t vf_info_ilpack = {
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
447 "4:2:0 planar -> 4:2:2 packed reinterlacer",
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
448 "ilpack",
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
449 "Richard Felker",
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
450 "",
30638
a7b908875c14 Rename open() vf initialization function to vf_open().
diego
parents: 30633
diff changeset
451 vf_open,
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
452 NULL
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
453 };