annotate libmpcodecs/vf_ilpack.c @ 17197:0ab565f7ed60

Avoid gcc warnings: '...' might be used uninitialized in this function In this case 'H', 'N', 'D', and 'F' can indeed be used unitialized, thus possibly causing all sorts of problems. Patch by Peter Breitenlohner
author rathann
date Thu, 15 Dec 2005 20:39:59 +0000
parents 6ff3379a0862
children 20aca9baf5d8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
1 #include <stdio.h>
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
2 #include <stdlib.h>
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
3 #include <string.h>
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
4 #include <inttypes.h>
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
5
17012
6ff3379a0862 Unify include path handling, -I.. is in CFLAGS.
diego
parents: 13720
diff changeset
6 #include "config.h"
6ff3379a0862 Unify include path handling, -I.. is in CFLAGS.
diego
parents: 13720
diff changeset
7 #include "mp_msg.h"
6ff3379a0862 Unify include path handling, -I.. is in CFLAGS.
diego
parents: 13720
diff changeset
8 #include "cpudetect.h"
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
9
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
10 #include "img_format.h"
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
11 #include "mp_image.h"
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
12 #include "vf.h"
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
13
17012
6ff3379a0862 Unify include path handling, -I.. is in CFLAGS.
diego
parents: 13720
diff changeset
14 #include "libvo/fastmemcpy.h"
6ff3379a0862 Unify include path handling, -I.. is in CFLAGS.
diego
parents: 13720
diff changeset
15 #include "postproc/rgb2rgb.h"
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
16
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
17 typedef void (pack_func_t)(unsigned char *dst, unsigned char *y,
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
18 unsigned char *u, unsigned char *v, int w, int us, int vs);
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
19
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
20 struct vf_priv_s {
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
21 int mode;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
22 pack_func_t *pack[2];
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
23 };
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
24
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
25 static void pack_nn_C(unsigned char *dst, unsigned char *y,
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
26 unsigned char *u, unsigned char *v, int w)
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
27 {
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
28 int j;
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
29 for (j = w/2; j; j--) {
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
30 *dst++ = *y++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
31 *dst++ = *u++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
32 *dst++ = *y++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
33 *dst++ = *v++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
34 }
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
35 }
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
36
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
37 static void pack_li_0_C(unsigned char *dst, unsigned char *y,
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
38 unsigned char *u, unsigned char *v, int w, int us, int vs)
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
39 {
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
40 int j;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
41 for (j = w/2; j; j--) {
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
42 *dst++ = *y++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
43 *dst++ = (u[us+us] + 7*u[0])>>3;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
44 *dst++ = *y++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
45 *dst++ = (v[vs+vs] + 7*v[0])>>3;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
46 u++; v++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
47 }
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
48 }
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
49
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
50 static void pack_li_1_C(unsigned char *dst, unsigned char *y,
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
51 unsigned char *u, unsigned char *v, int w, int us, int vs)
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
52 {
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
53 int j;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
54 for (j = w/2; j; j--) {
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
55 *dst++ = *y++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
56 *dst++ = (3*u[us+us] + 5*u[0])>>3;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
57 *dst++ = *y++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
58 *dst++ = (3*v[vs+vs] + 5*v[0])>>3;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
59 u++; v++;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
60 }
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
61 }
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
62
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
63 #ifdef HAVE_MMX
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
64 static void pack_nn_MMX(unsigned char *dst, unsigned char *y,
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
65 unsigned char *u, unsigned char *v, int w)
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
66 {
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
67 int j;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
68 asm volatile (""
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
69 ".balign 16 \n\t"
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
70 "1: \n\t"
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
71 "movq (%0), %%mm1 \n\t"
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
72 "movq (%0), %%mm2 \n\t"
11648
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
73 "movq (%1), %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
74 "movq (%2), %%mm6 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
75 "punpcklbw %%mm6, %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
76 "punpcklbw %%mm4, %%mm1 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
77 "punpckhbw %%mm4, %%mm2 \n\t"
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
78
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
79 "add $8, %0 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
80 "add $4, %1 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
81 "add $4, %2 \n\t"
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
82 "movq %%mm1, (%3) \n\t"
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
83 "movq %%mm2, 8(%3) \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
84 "add $16, %3 \n\t"
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
85 "decl %4 \n\t"
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
86 "jnz 1b \n\t"
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
87 "emms \n\t"
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
88 :
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
89 : "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8)
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
90 : "memory"
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
91 );
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
92 pack_nn_C(dst, y, u, v, (w&7));
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
93 }
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
94
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
95 static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
96 unsigned char *u, unsigned char *v, int w, int us, int vs)
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
97 {
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
98 asm volatile (""
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
99 "push %%"REG_BP" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
100 #ifdef ARCH_X86_64
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
101 "mov %6, %%"REG_BP" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
102 #else
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
103 "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
104 "movl (%%"REG_d"), %%"REG_d" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
105 #endif
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
106 "pxor %%mm0, %%mm0 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
107
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
108 ".balign 16 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
109 ".Lli0: \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
110 "movq (%%"REG_S"), %%mm1 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
111 "movq (%%"REG_S"), %%mm2 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
112
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
113 "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
114 "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
115 "punpcklbw %%mm0, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
116 "punpcklbw %%mm0, %%mm6 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
117 "movq (%%"REG_a"), %%mm3 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
118 "movq (%%"REG_b"), %%mm5 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
119 "punpcklbw %%mm0, %%mm3 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
120 "punpcklbw %%mm0, %%mm5 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
121 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
122 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
123 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
124 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
125 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
126 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
127 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
128 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
129 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
130 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
131 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
132 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
133 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
134 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
135 "psrlw $3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
136 "psrlw $3, %%mm6 \n\t"
11648
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
137 "packuswb %%mm4, %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
138 "packuswb %%mm6, %%mm6 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
139 "punpcklbw %%mm6, %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
140 "punpcklbw %%mm4, %%mm1 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
141 "punpckhbw %%mm4, %%mm2 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
142
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
143 "movq %%mm1, (%%"REG_D") \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
144 "movq %%mm2, 8(%%"REG_D") \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
145
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
146 "movq 8(%%"REG_S"), %%mm1 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
147 "movq 8(%%"REG_S"), %%mm2 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
148
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
149 "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
150 "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
151 "punpckhbw %%mm0, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
152 "punpckhbw %%mm0, %%mm6 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
153 "movq (%%"REG_a"), %%mm3 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
154 "movq (%%"REG_b"), %%mm5 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
155 "punpckhbw %%mm0, %%mm3 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
156 "punpckhbw %%mm0, %%mm5 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
157 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
158 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
159 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
160 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
161 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
162 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
163 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
164 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
165 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
166 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
167 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
168 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
169 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
170 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
171 "psrlw $3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
172 "psrlw $3, %%mm6 \n\t"
11648
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
173 "packuswb %%mm4, %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
174 "packuswb %%mm6, %%mm6 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
175 "punpcklbw %%mm6, %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
176 "punpcklbw %%mm4, %%mm1 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
177 "punpckhbw %%mm4, %%mm2 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
178
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
179 "add $16, %%"REG_S" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
180 "add $8, %%"REG_a" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
181 "add $8, %%"REG_b" \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
182
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
183 "movq %%mm1, 16(%%"REG_D") \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
184 "movq %%mm2, 24(%%"REG_D") \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
185 "add $32, %%"REG_D" \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
186
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
187 "decl %%ecx \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
188 "jnz .Lli0 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
189 "emms \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
190 "pop %%"REG_BP" \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
191 :
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
192 : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
193 #ifdef ARCH_X86_64
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
194 "d" ((long)us), "r" ((long)vs)
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
195 #else
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
196 "d" (&us)
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
197 #endif
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
198 : "memory"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
199 );
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
200 pack_li_0_C(dst, y, u, v, (w&15), us, vs);
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
201 }
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
202
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
203 static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
204 unsigned char *u, unsigned char *v, int w, int us, int vs)
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
205 {
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
206 asm volatile (""
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
207 "push %%"REG_BP" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
208 #ifdef ARCH_X86_64
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
209 "mov %6, %%"REG_BP" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
210 #else
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
211 "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
212 "movl (%%"REG_d"), %%"REG_d" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
213 #endif
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
214 "pxor %%mm0, %%mm0 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
215
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
216 ".balign 16 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
217 ".Lli1: \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
218 "movq (%%"REG_S"), %%mm1 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
219 "movq (%%"REG_S"), %%mm2 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
220
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
221 "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
222 "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
223 "punpcklbw %%mm0, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
224 "punpcklbw %%mm0, %%mm6 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
225 "movq (%%"REG_a"), %%mm3 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
226 "movq (%%"REG_b"), %%mm5 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
227 "punpcklbw %%mm0, %%mm3 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
228 "punpcklbw %%mm0, %%mm5 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
229 "movq %%mm4, %%mm7 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
230 "paddw %%mm4, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
231 "paddw %%mm7, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
232 "movq %%mm6, %%mm7 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
233 "paddw %%mm6, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
234 "paddw %%mm7, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
235 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
236 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
237 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
238 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
239 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
240 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
241 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
242 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
243 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
244 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
245 "psrlw $3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
246 "psrlw $3, %%mm6 \n\t"
11648
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
247 "packuswb %%mm4, %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
248 "packuswb %%mm6, %%mm6 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
249 "punpcklbw %%mm6, %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
250 "punpcklbw %%mm4, %%mm1 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
251 "punpckhbw %%mm4, %%mm2 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
252
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
253 "movq %%mm1, (%%"REG_D") \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
254 "movq %%mm2, 8(%%"REG_D") \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
255
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
256 "movq 8(%%"REG_S"), %%mm1 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
257 "movq 8(%%"REG_S"), %%mm2 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
258
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
259 "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
260 "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
261 "punpckhbw %%mm0, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
262 "punpckhbw %%mm0, %%mm6 \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
263 "movq (%%"REG_a"), %%mm3 \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
264 "movq (%%"REG_b"), %%mm5 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
265 "punpckhbw %%mm0, %%mm3 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
266 "punpckhbw %%mm0, %%mm5 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
267 "movq %%mm4, %%mm7 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
268 "paddw %%mm4, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
269 "paddw %%mm7, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
270 "movq %%mm6, %%mm7 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
271 "paddw %%mm6, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
272 "paddw %%mm7, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
273 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
274 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
275 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
276 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
277 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
278 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
279 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
280 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
281 "paddw %%mm3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
282 "paddw %%mm5, %%mm6 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
283 "psrlw $3, %%mm4 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
284 "psrlw $3, %%mm6 \n\t"
11648
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
285 "packuswb %%mm4, %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
286 "packuswb %%mm6, %%mm6 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
287 "punpcklbw %%mm6, %%mm4 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
288 "punpcklbw %%mm4, %%mm1 \n\t"
57372aa1d655 mmx simplifications
michael
parents: 11645
diff changeset
289 "punpckhbw %%mm4, %%mm2 \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
290
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
291 "add $16, %%"REG_S" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
292 "add $8, %%"REG_a" \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
293 "add $8, %%"REG_b" \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
294
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
295 "movq %%mm1, 16(%%"REG_D") \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
296 "movq %%mm2, 24(%%"REG_D") \n\t"
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
297 "add $32, %%"REG_D" \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
298
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
299 "decl %%ecx \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
300 "jnz .Lli1 \n\t"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
301 "emms \n\t"
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
302 "pop %%"REG_BP" \n\t"
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
303 :
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
304 : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
305 #ifdef ARCH_X86_64
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
306 "d" ((long)us), "r" ((long)vs)
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
307 #else
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
308 "d" (&us)
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 11648
diff changeset
309 #endif
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
310 : "memory"
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
311 );
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
312 pack_li_1_C(dst, y, u, v, (w&15), us, vs);
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
313 }
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
314 #endif
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
315
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
316 static pack_func_t *pack_nn;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
317 static pack_func_t *pack_li_0;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
318 static pack_func_t *pack_li_1;
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
319
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
320 static void ilpack(unsigned char *dst, unsigned char *src[3],
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
321 int dststride, int srcstride[3], int w, int h, pack_func_t *pack[2])
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
322 {
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
323 int i;
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
324 unsigned char *y, *u, *v;
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
325 int ys = srcstride[0], us = srcstride[1], vs = srcstride[2];
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
326 int a, b;
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
327
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
328 y = src[0];
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
329 u = src[1];
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
330 v = src[2];
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
331
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
332 pack_nn(dst, y, u, v, w, 0, 0);
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
333 y += ys; dst += dststride;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
334 pack_nn(dst, y, u+us, v+vs, w, 0, 0);
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
335 y += ys; dst += dststride;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
336 for (i=2; i<h-2; i++) {
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
337 a = (i&2) ? 1 : -1;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
338 b = (i&1) ^ ((i&2)>>1);
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
339 pack[b](dst, y, u, v, w, us*a, vs*a);
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
340 y += ys;
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
341 if ((i&3) == 1) {
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
342 u -= us;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
343 v -= vs;
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
344 } else {
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
345 u += us;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
346 v += vs;
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
347 }
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
348 dst += dststride;
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
349 }
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
350 pack_nn(dst, y, u, v, w, 0, 0);
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
351 y += ys; dst += dststride; u += us; v += vs;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
352 pack_nn(dst, y, u, v, w, 0, 0);
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
353 }
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
354
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
355
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
356 static int put_image(struct vf_instance_s* vf, mp_image_t *mpi)
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
357 {
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
358 mp_image_t *dmpi;
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
359
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
360 // hope we'll get DR buffer:
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
361 dmpi=vf_get_image(vf->next, IMGFMT_YUY2,
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
362 MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE,
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
363 mpi->w, mpi->h);
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
364
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
365 ilpack(dmpi->planes[0], mpi->planes, dmpi->stride[0], mpi->stride, mpi->w, mpi->h, vf->priv->pack);
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
366
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
367 return vf_next_put_image(vf,dmpi);
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
368 }
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
369
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
370 static int config(struct vf_instance_s* vf,
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
371 int width, int height, int d_width, int d_height,
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
372 unsigned int flags, unsigned int outfmt)
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
373 {
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
374 /* FIXME - also support UYVY output? */
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
375 return vf_next_config(vf, width, height, d_width, d_height, flags, IMGFMT_YUY2);
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
376 }
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
377
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
378
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
379 static int query_format(struct vf_instance_s* vf, unsigned int fmt)
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
380 {
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
381 /* FIXME - really any YUV 4:2:0 input format should work */
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
382 switch (fmt) {
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
383 case IMGFMT_YV12:
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
384 case IMGFMT_IYUV:
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
385 case IMGFMT_I420:
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
386 return vf_next_query_format(vf,IMGFMT_YUY2);
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
387 }
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
388 return 0;
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
389 }
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
390
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
391 static int open(vf_instance_t *vf, char* args)
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
392 {
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
393 vf->config=config;
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
394 vf->query_format=query_format;
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
395 vf->put_image=put_image;
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
396 vf->priv = calloc(1, sizeof(struct vf_priv_s));
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
397 vf->priv->mode = 1;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
398 if (args) sscanf(args, "%d", &vf->priv->mode);
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
399
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
400 pack_nn = (pack_func_t *)pack_nn_C;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
401 pack_li_0 = pack_li_0_C;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
402 pack_li_1 = pack_li_1_C;
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
403 #ifdef HAVE_MMX
11645
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
404 if(gCpuCaps.hasMMX) {
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
405 pack_nn = (pack_func_t *)pack_nn_MMX;
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
406 pack_li_0 = pack_li_0_MMX;
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
407 pack_li_1 = pack_li_1_MMX;
3837fd1bfa5b mmx optimizations
rfelker
parents: 11643
diff changeset
408 }
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
409 #endif
11643
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
410
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
411 switch(vf->priv->mode) {
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
412 case 0:
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
413 vf->priv->pack[0] = vf->priv->pack[1] = pack_nn;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
414 break;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
415 default:
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
416 mp_msg(MSGT_VFILTER, MSGL_WARN,
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
417 "ilpack: unknown mode %d (fallback to linear)\n",
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
418 vf->priv->mode);
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
419 case 1:
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
420 vf->priv->pack[0] = pack_li_0;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
421 vf->priv->pack[1] = pack_li_1;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
422 break;
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
423 }
3ddfe9316ca9 big updates to ilpack: do proper interpolation rather than just
rfelker
parents: 9933
diff changeset
424
9933
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
425 return 1;
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
426 }
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
427
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
428 vf_info_t vf_info_ilpack = {
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
429 "4:2:0 planar -> 4:2:2 packed reinterlacer",
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
430 "ilpack",
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
431 "Richard Felker",
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
432 "",
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
433 open,
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
434 NULL
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
435 };
3548701a13fe 1. new alternate approach to inverse telecine! much better!
rfelker
parents:
diff changeset
436