annotate postproc/swscale.c @ 4467:9512d6832b38

YUY2, BGR24, BGR32 input support (no mmx yet)
author michael
date Fri, 01 Feb 2002 19:25:09 +0000
parents 5dd78b21afbc
children 76fb5d33e6eb
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4295
67c56df76a44 copyright(swscaler) = GPL
michael
parents: 4294
diff changeset
1 /*
67c56df76a44 copyright(swscaler) = GPL
michael
parents: 4294
diff changeset
2 Copyright (C) 2001-2002 Michael Niedermayer <michaelni@gmx.at>
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
3
4295
67c56df76a44 copyright(swscaler) = GPL
michael
parents: 4294
diff changeset
4 This program is free software; you can redistribute it and/or modify
67c56df76a44 copyright(swscaler) = GPL
michael
parents: 4294
diff changeset
5 it under the terms of the GNU General Public License as published by
67c56df76a44 copyright(swscaler) = GPL
michael
parents: 4294
diff changeset
6 the Free Software Foundation; either version 2 of the License, or
67c56df76a44 copyright(swscaler) = GPL
michael
parents: 4294
diff changeset
7 (at your option) any later version.
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
8
4295
67c56df76a44 copyright(swscaler) = GPL
michael
parents: 4294
diff changeset
9 This program is distributed in the hope that it will be useful,
67c56df76a44 copyright(swscaler) = GPL
michael
parents: 4294
diff changeset
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
67c56df76a44 copyright(swscaler) = GPL
michael
parents: 4294
diff changeset
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
67c56df76a44 copyright(swscaler) = GPL
michael
parents: 4294
diff changeset
12 GNU General Public License for more details.
67c56df76a44 copyright(swscaler) = GPL
michael
parents: 4294
diff changeset
13
67c56df76a44 copyright(swscaler) = GPL
michael
parents: 4294
diff changeset
14 You should have received a copy of the GNU General Public License
67c56df76a44 copyright(swscaler) = GPL
michael
parents: 4294
diff changeset
15 along with this program; if not, write to the Free Software
67c56df76a44 copyright(swscaler) = GPL
michael
parents: 4294
diff changeset
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
67c56df76a44 copyright(swscaler) = GPL
michael
parents: 4294
diff changeset
17 */
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
18
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
19 /*
4467
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
20 supported Input formats: YV12, I420, IYUV, YUY2, BGR32, BGR24 (grayscale soon too)
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
21 supported output formats: YV12, I420, IYUV, BGR15, BGR16, BGR24, BGR32 (grayscale soon too)
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
22 BGR15/16 support dithering
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
23 */
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
24
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
25 #include <inttypes.h>
2476
a6c5a537f30a a few warning fixes (missing #include's)
pl
parents: 2469
diff changeset
26 #include <string.h>
3272
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
27 #include <math.h>
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
28 #include <stdio.h>
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
29 #include "../config.h"
4248
3cdb86beebce mangle for win32 in postproc
atmos4
parents: 3641
diff changeset
30 #include "../mangle.h"
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
31 #ifdef HAVE_MALLOC_H
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
32 #include <malloc.h>
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
33 #endif
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
34 #include "swscale.h"
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
35 #include "../cpudetect.h"
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
36 #include "../libvo/img_format.h"
2540
f2e70944d02a fixed a warning
michael
parents: 2534
diff changeset
37 #undef MOVNTQ
2680
e8a534509557 green line fix for dstw%8!=0
michael
parents: 2671
diff changeset
38 #undef PAVGB
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
39
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
40 //#undef HAVE_MMX2
4281
michael
parents: 4276
diff changeset
41 //#define HAVE_3DNOW
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
42 //#undef HAVE_MMX
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
43 //#undef ARCH_X86
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
44 #define DITHER1XBPP
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
45
4467
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
46 #define RET 0xC3 //near return opcode for X86
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
47
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
48 #ifdef MP_DEBUG
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
49 #define ASSERT(x) if(!(x)) { printf("ASSERT " #x " failed\n"); *((int*)0)=0; }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
50 #else
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
51 #define ASSERT(x) ;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
52 #endif
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
53
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
54 #ifdef M_PI
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
55 #define PI M_PI
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
56 #else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
57 #define PI 3.14159265358979323846
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
58 #endif
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
59
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
60 //FIXME replace this with something faster
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
61 #define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
4467
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
62 #define isYUV(x) ((x)==IMGFMT_YUY2 || isPlanarYUV(x))
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
63 #define isHalfChrV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
4467
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
64 #define isHalfChrH(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
65 #define isPacked(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24)
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
66
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
67 #define RGB2YUV_SHIFT 8
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
68 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
69 #define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
70 #define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
71 #define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
72 #define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
73 #define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
74 #define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
75 #define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
76 #define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
77
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
78 extern int verbose; // defined in mplayer.c
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
79 /*
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
80 NOTES
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
81
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
82 known BUGS with known cause (no bugreports please!, but patches are welcome :) )
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
83 horizontal fast_bilinear MMX2 scaler reads 1-7 samples too much (might cause a sig11)
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
84
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
85 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
86
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
87 TODO
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
88 more intelligent missalignment avoidance for the horizontal scaler
2585
bd52b78f12dc c speedup
michael
parents: 2584
diff changeset
89 change the distance of the u & v buffer
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
90 write special vertical cubic upscale version
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
91 Optimize C code (yv12 / minmax)
4401
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
92 add support for packed pixel yuv input & output
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
93 add support for Y8 input & output
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
94 add BGR4 output support
4467
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
95 write special BGR->BGR scaler
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
96 */
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
97
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
98 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
2469
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
99 #define MIN(a,b) ((a) > (b) ? (b) : (a))
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
100 #define MAX(a,b) ((a) < (b) ? (b) : (a))
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
101
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
102 #ifdef ARCH_X86
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
103 #define CAN_COMPILE_X86_ASM
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
104 #endif
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
105
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
106 #ifdef CAN_COMPILE_X86_ASM
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
107 static uint64_t __attribute__((aligned(8))) yCoeff= 0x2568256825682568LL;
2503
d21d8d5f2e23 yuv2rgb bugfix
michael
parents: 2476
diff changeset
108 static uint64_t __attribute__((aligned(8))) vrCoeff= 0x3343334333433343LL;
d21d8d5f2e23 yuv2rgb bugfix
michael
parents: 2476
diff changeset
109 static uint64_t __attribute__((aligned(8))) ubCoeff= 0x40cf40cf40cf40cfLL;
d21d8d5f2e23 yuv2rgb bugfix
michael
parents: 2476
diff changeset
110 static uint64_t __attribute__((aligned(8))) vgCoeff= 0xE5E2E5E2E5E2E5E2LL;
d21d8d5f2e23 yuv2rgb bugfix
michael
parents: 2476
diff changeset
111 static uint64_t __attribute__((aligned(8))) ugCoeff= 0xF36EF36EF36EF36ELL;
2669
476b9b3b91be faster bgr15/16
michael
parents: 2638
diff changeset
112 static uint64_t __attribute__((aligned(8))) bF8= 0xF8F8F8F8F8F8F8F8LL;
476b9b3b91be faster bgr15/16
michael
parents: 2638
diff changeset
113 static uint64_t __attribute__((aligned(8))) bFC= 0xFCFCFCFCFCFCFCFCLL;
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
114 static uint64_t __attribute__((aligned(8))) w400= 0x0400040004000400LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
115 static uint64_t __attribute__((aligned(8))) w80= 0x0080008000800080LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
116 static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL;
3272
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
117 static uint64_t __attribute__((aligned(8))) w02= 0x0002000200020002LL;
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
118 static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
119 static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
120 static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
121
2750
9ef09e232505 gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents: 2748
diff changeset
122 static volatile uint64_t __attribute__((aligned(8))) b5Dither;
9ef09e232505 gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents: 2748
diff changeset
123 static volatile uint64_t __attribute__((aligned(8))) g5Dither;
9ef09e232505 gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents: 2748
diff changeset
124 static volatile uint64_t __attribute__((aligned(8))) g6Dither;
9ef09e232505 gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents: 2748
diff changeset
125 static volatile uint64_t __attribute__((aligned(8))) r5Dither;
2748
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
126
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
127 static uint64_t __attribute__((aligned(8))) dither4[2]={
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
128 0x0103010301030103LL,
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
129 0x0200020002000200LL,};
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
130
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
131 static uint64_t __attribute__((aligned(8))) dither8[2]={
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
132 0x0602060206020602LL,
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
133 0x0004000400040004LL,};
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
134
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
135 static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
136 static uint64_t __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
137 static uint64_t __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
138 static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
139 static uint64_t __attribute__((aligned(8))) g15Mask= 0x03E003E003E003E0LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
140 static uint64_t __attribute__((aligned(8))) r15Mask= 0x7C007C007C007C00LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
141
2730
c483fc9bf0c4 faster bgr24 output
michael
parents: 2728
diff changeset
142 static uint64_t __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL;
c483fc9bf0c4 faster bgr24 output
michael
parents: 2728
diff changeset
143 static uint64_t __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL;
c483fc9bf0c4 faster bgr24 output
michael
parents: 2728
diff changeset
144 static uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL;
c483fc9bf0c4 faster bgr24 output
michael
parents: 2728
diff changeset
145
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
146 // FIXME remove
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
147 static uint64_t __attribute__((aligned(8))) asm_yalpha1;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
148 static uint64_t __attribute__((aligned(8))) asm_uvalpha1;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
149 #endif
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
150
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
151 // clipping helper table for C implementations:
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
152 static unsigned char clip_table[768];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
153
2584
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
154 static unsigned short clip_table16b[768];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
155 static unsigned short clip_table16g[768];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
156 static unsigned short clip_table16r[768];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
157 static unsigned short clip_table15b[768];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
158 static unsigned short clip_table15g[768];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
159 static unsigned short clip_table15r[768];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
160
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
161 // yuv->rgb conversion tables:
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
162 static int yuvtab_2568[256];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
163 static int yuvtab_3343[256];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
164 static int yuvtab_0c92[256];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
165 static int yuvtab_1a1e[256];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
166 static int yuvtab_40cf[256];
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
167 // Needed for cubic scaler to catch overflows
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
168 static int clip_yuvtab_2568[768];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
169 static int clip_yuvtab_3343[768];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
170 static int clip_yuvtab_0c92[768];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
171 static int clip_yuvtab_1a1e[768];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
172 static int clip_yuvtab_40cf[768];
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
173
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
174 //global sws_flags from the command line
4421
5dd78b21afbc -sws 2 is default now
michael
parents: 4419
diff changeset
175 int sws_flags=2;
3272
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
176
4294
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
177 //global srcFilter
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
178 SwsFilter src_filter= {NULL, NULL, NULL, NULL};
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
179
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
180 float sws_lum_gblur= 0.0;
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
181 float sws_chr_gblur= 0.0;
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
182 int sws_chr_vshift= 0;
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
183 int sws_chr_hshift= 0;
4297
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
184 float sws_chr_sharpen= 0.0;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
185 float sws_lum_sharpen= 0.0;
4294
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
186
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
187 /* cpuCaps combined from cpudetect and whats actually compiled in
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
188 (if there is no support for something compiled in it wont appear here) */
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
189 static CpuCaps cpuCaps;
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
190
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
191 void (*swScale)(SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
192 int srcSliceH, uint8_t* dst[], int dstStride[])=NULL;
2469
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
193
4294
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
194 static SwsVector *getConvVec(SwsVector *a, SwsVector *b);
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
195
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
196 #ifdef CAN_COMPILE_X86_ASM
2671
555cb027c7a7 fixed warnings
michael
parents: 2669
diff changeset
197 void in_asm_used_var_warning_killer()
555cb027c7a7 fixed warnings
michael
parents: 2669
diff changeset
198 {
3272
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
199 volatile int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
200 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+asm_yalpha1+ asm_uvalpha1+
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
201 M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0];
2671
555cb027c7a7 fixed warnings
michael
parents: 2669
diff changeset
202 if(i) i=0;
555cb027c7a7 fixed warnings
michael
parents: 2669
diff changeset
203 }
555cb027c7a7 fixed warnings
michael
parents: 2669
diff changeset
204 #endif
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
205
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
206 static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
207 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
208 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
209 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
210 //FIXME Optimize (just quickly writen not opti..)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
211 int i;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
212 for(i=0; i<dstW; i++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
213 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
214 int val=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
215 int j;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
216 for(j=0; j<lumFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
217 val += lumSrc[j][i] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
218
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
219 dest[i]= MIN(MAX(val>>19, 0), 255);
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
220 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
221
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
222 if(uDest != NULL)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
223 for(i=0; i<(dstW>>1); i++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
224 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
225 int u=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
226 int v=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
227 int j;
3641
33c560ffd3dc minor bugfixes (noone noticed them)
michael
parents: 3352
diff changeset
228 for(j=0; j<chrFilterSize; j++)
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
229 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
230 u += chrSrc[j][i] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
231 v += chrSrc[j][i + 2048] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
232 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
233
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
234 uDest[i]= MIN(MAX(u>>19, 0), 255);
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
235 vDest[i]= MIN(MAX(v>>19, 0), 255);
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
236 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
237 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
238
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
239 static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
240 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
241 uint8_t *dest, int dstW, int dstFormat)
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
242 {
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
243 if(dstFormat==IMGFMT_BGR32)
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
244 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
245 int i;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
246 for(i=0; i<(dstW>>1); i++){
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
247 int j;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
248 int Y1=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
249 int Y2=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
250 int U=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
251 int V=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
252 int Cb, Cr, Cg;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
253 for(j=0; j<lumFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
254 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
255 Y1 += lumSrc[j][2*i] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
256 Y2 += lumSrc[j][2*i+1] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
257 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
258 for(j=0; j<chrFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
259 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
260 U += chrSrc[j][i] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
261 V += chrSrc[j][i+2048] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
262 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
263 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
264 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
265 U >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
266 V >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
267
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
268 Cb= clip_yuvtab_40cf[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
269 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
270 Cr= clip_yuvtab_3343[V+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
271
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
272 dest[8*i+0]=clip_table[((Y1 + Cb) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
273 dest[8*i+1]=clip_table[((Y1 + Cg) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
274 dest[8*i+2]=clip_table[((Y1 + Cr) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
275
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
276 dest[8*i+4]=clip_table[((Y2 + Cb) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
277 dest[8*i+5]=clip_table[((Y2 + Cg) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
278 dest[8*i+6]=clip_table[((Y2 + Cr) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
279 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
280 }
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
281 else if(dstFormat==IMGFMT_BGR24)
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
282 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
283 int i;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
284 for(i=0; i<(dstW>>1); i++){
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
285 int j;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
286 int Y1=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
287 int Y2=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
288 int U=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
289 int V=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
290 int Cb, Cr, Cg;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
291 for(j=0; j<lumFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
292 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
293 Y1 += lumSrc[j][2*i] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
294 Y2 += lumSrc[j][2*i+1] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
295 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
296 for(j=0; j<chrFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
297 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
298 U += chrSrc[j][i] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
299 V += chrSrc[j][i+2048] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
300 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
301 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
302 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
303 U >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
304 V >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
305
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
306 Cb= clip_yuvtab_40cf[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
307 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
308 Cr= clip_yuvtab_3343[V+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
309
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
310 dest[0]=clip_table[((Y1 + Cb) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
311 dest[1]=clip_table[((Y1 + Cg) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
312 dest[2]=clip_table[((Y1 + Cr) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
313
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
314 dest[3]=clip_table[((Y2 + Cb) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
315 dest[4]=clip_table[((Y2 + Cg) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
316 dest[5]=clip_table[((Y2 + Cr) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
317 dest+=6;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
318 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
319 }
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
320 else if(dstFormat==IMGFMT_BGR16)
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
321 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
322 int i;
4297
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
323 #ifdef DITHER1XBPP
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
324 static int ditherb1=1<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
325 static int ditherg1=1<<13;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
326 static int ditherr1=2<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
327 static int ditherb2=3<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
328 static int ditherg2=3<<13;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
329 static int ditherr2=0<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
330
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
331 ditherb1 ^= (1^2)<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
332 ditherg1 ^= (1^2)<<13;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
333 ditherr1 ^= (1^2)<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
334 ditherb2 ^= (3^0)<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
335 ditherg2 ^= (3^0)<<13;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
336 ditherr2 ^= (3^0)<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
337 #else
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
338 const int ditherb1=0;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
339 const int ditherg1=0;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
340 const int ditherr1=0;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
341 const int ditherb2=0;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
342 const int ditherg2=0;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
343 const int ditherr2=0;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
344 #endif
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
345 for(i=0; i<(dstW>>1); i++){
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
346 int j;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
347 int Y1=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
348 int Y2=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
349 int U=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
350 int V=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
351 int Cb, Cr, Cg;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
352 for(j=0; j<lumFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
353 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
354 Y1 += lumSrc[j][2*i] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
355 Y2 += lumSrc[j][2*i+1] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
356 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
357 for(j=0; j<chrFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
358 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
359 U += chrSrc[j][i] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
360 V += chrSrc[j][i+2048] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
361 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
362 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
363 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
364 U >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
365 V >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
366
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
367 Cb= clip_yuvtab_40cf[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
368 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
369 Cr= clip_yuvtab_3343[V+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
370
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
371 ((uint16_t*)dest)[2*i] =
4297
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
372 clip_table16b[(Y1 + Cb + ditherb1) >>13] |
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
373 clip_table16g[(Y1 + Cg + ditherg1) >>13] |
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
374 clip_table16r[(Y1 + Cr + ditherr1) >>13];
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
375
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
376 ((uint16_t*)dest)[2*i+1] =
4297
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
377 clip_table16b[(Y2 + Cb + ditherb2) >>13] |
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
378 clip_table16g[(Y2 + Cg + ditherg2) >>13] |
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
379 clip_table16r[(Y2 + Cr + ditherr2) >>13];
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
380 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
381 }
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
382 else if(dstFormat==IMGFMT_BGR15)
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
383 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
384 int i;
4297
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
385 #ifdef DITHER1XBPP
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
386 static int ditherb1=1<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
387 static int ditherg1=1<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
388 static int ditherr1=2<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
389 static int ditherb2=3<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
390 static int ditherg2=3<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
391 static int ditherr2=0<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
392
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
393 ditherb1 ^= (1^2)<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
394 ditherg1 ^= (1^2)<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
395 ditherr1 ^= (1^2)<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
396 ditherb2 ^= (3^0)<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
397 ditherg2 ^= (3^0)<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
398 ditherr2 ^= (3^0)<<14;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
399 #else
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
400 const int ditherb1=0;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
401 const int ditherg1=0;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
402 const int ditherr1=0;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
403 const int ditherb2=0;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
404 const int ditherg2=0;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
405 const int ditherr2=0;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
406 #endif
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
407 for(i=0; i<(dstW>>1); i++){
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
408 int j;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
409 int Y1=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
410 int Y2=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
411 int U=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
412 int V=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
413 int Cb, Cr, Cg;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
414 for(j=0; j<lumFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
415 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
416 Y1 += lumSrc[j][2*i] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
417 Y2 += lumSrc[j][2*i+1] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
418 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
419 for(j=0; j<chrFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
420 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
421 U += chrSrc[j][i] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
422 V += chrSrc[j][i+2048] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
423 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
424 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
425 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
426 U >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
427 V >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
428
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
429 Cb= clip_yuvtab_40cf[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
430 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
431 Cr= clip_yuvtab_3343[V+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
432
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
433 ((uint16_t*)dest)[2*i] =
4297
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
434 clip_table15b[(Y1 + Cb + ditherb1) >>13] |
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
435 clip_table15g[(Y1 + Cg + ditherg1) >>13] |
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
436 clip_table15r[(Y1 + Cr + ditherr1) >>13];
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
437
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
438 ((uint16_t*)dest)[2*i+1] =
4297
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
439 clip_table15b[(Y2 + Cb + ditherb2) >>13] |
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
440 clip_table15g[(Y2 + Cg + ditherg2) >>13] |
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
441 clip_table15r[(Y2 + Cr + ditherr2) >>13];
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
442 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
443 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
444 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
445
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
446
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
447 //Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
448 //Plain C versions
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
449 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
450 #define COMPILE_C
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
451 #endif
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
452
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
453 #ifdef CAN_COMPILE_X86_ASM
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
454
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
455 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
456 #define COMPILE_MMX
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
457 #endif
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
458
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
459 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
460 #define COMPILE_MMX2
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
461 #endif
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
462
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
463 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
464 #define COMPILE_3DNOW
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
465 #endif
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
466 #endif //CAN_COMPILE_X86_ASM
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
467
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
468 #undef HAVE_MMX
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
469 #undef HAVE_MMX2
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
470 #undef HAVE_3DNOW
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
471
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
472 #ifdef COMPILE_C
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
473 #undef HAVE_MMX
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
474 #undef HAVE_MMX2
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
475 #undef HAVE_3DNOW
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
476 #define RENAME(a) a ## _C
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
477 #include "swscale_template.c"
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
478 #endif
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
479
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
480 #ifdef CAN_COMPILE_X86_ASM
2576
437ed06579d8 c optimizations
michael
parents: 2575
diff changeset
481
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
482 //X86 versions
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
483 /*
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
484 #undef RENAME
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
485 #undef HAVE_MMX
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
486 #undef HAVE_MMX2
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
487 #undef HAVE_3DNOW
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
488 #define ARCH_X86
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
489 #define RENAME(a) a ## _X86
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
490 #include "swscale_template.c"
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
491 */
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
492 //MMX versions
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
493 #ifdef COMPILE_MMX
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
494 #undef RENAME
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
495 #define HAVE_MMX
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
496 #undef HAVE_MMX2
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
497 #undef HAVE_3DNOW
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
498 #define RENAME(a) a ## _MMX
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
499 #include "swscale_template.c"
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
500 #endif
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
501
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
502 //MMX2 versions
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
503 #ifdef COMPILE_MMX2
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
504 #undef RENAME
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
505 #define HAVE_MMX
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
506 #define HAVE_MMX2
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
507 #undef HAVE_3DNOW
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
508 #define RENAME(a) a ## _MMX2
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
509 #include "swscale_template.c"
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
510 #endif
2469
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
511
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
512 //3DNOW versions
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
513 #ifdef COMPILE_3DNOW
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
514 #undef RENAME
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
515 #define HAVE_MMX
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
516 #undef HAVE_MMX2
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
517 #define HAVE_3DNOW
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
518 #define RENAME(a) a ## _3DNow
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
519 #include "swscale_template.c"
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
520 #endif
2469
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
521
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
522 #endif //CAN_COMPILE_X86_ASM
2469
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
523
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
524 // minor note: the HAVE_xyz is messed up after that line so dont use it
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
525
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
526
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
527 // old global scaler, dont use for new code
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
528 // will use sws_flags from the command line
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
529 void SwScale_YV12slice(unsigned char* src[], int srcStride[], int srcSliceY ,
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
530 int srcSliceH, uint8_t* dst[], int dstStride, int dstbpp,
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
531 int srcW, int srcH, int dstW, int dstH){
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
532
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
533 static SwsContext *context=NULL;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
534 int dstFormat;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
535 int dstStride3[3]= {dstStride, dstStride>>1, dstStride>>1};
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
536
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
537 switch(dstbpp)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
538 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
539 case 8 : dstFormat= IMGFMT_Y8; break;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
540 case 12: dstFormat= IMGFMT_YV12; break;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
541 case 15: dstFormat= IMGFMT_BGR15; break;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
542 case 16: dstFormat= IMGFMT_BGR16; break;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
543 case 24: dstFormat= IMGFMT_BGR24; break;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
544 case 32: dstFormat= IMGFMT_BGR32; break;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
545 default: return;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
546 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
547
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
548 if(!context) context=getSwsContextFromCmdLine(srcW, srcH, IMGFMT_YV12, dstW, dstH, dstFormat);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
549
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
550 swScale(context, src, srcStride, srcSliceY, srcSliceH, dst, dstStride3);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
551 }
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
552
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
553 // will use sws_flags & src_filter (from cmd line)
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
554 SwsContext *getSwsContextFromCmdLine(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat)
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
555 {
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
556 int flags=0;
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
557 static int firstTime=1;
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
558
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
559 #ifdef ARCH_X86
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
560 if(gCpuCaps.hasMMX)
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
561 asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
562 #endif
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
563 if(firstTime)
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
564 {
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
565 firstTime=0;
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
566 flags= SWS_PRINT_INFO;
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
567 }
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
568 else if(verbose>1) flags= SWS_PRINT_INFO;
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
569
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
570 if(src_filter.lumH) freeVec(src_filter.lumH);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
571 if(src_filter.lumV) freeVec(src_filter.lumV);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
572 if(src_filter.chrH) freeVec(src_filter.chrH);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
573 if(src_filter.chrV) freeVec(src_filter.chrV);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
574
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
575 if(sws_lum_gblur!=0.0){
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
576 src_filter.lumH= getGaussianVec(sws_lum_gblur, 3.0);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
577 src_filter.lumV= getGaussianVec(sws_lum_gblur, 3.0);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
578 }else{
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
579 src_filter.lumH= getIdentityVec();
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
580 src_filter.lumV= getIdentityVec();
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
581 }
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
582
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
583 if(sws_chr_gblur!=0.0){
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
584 src_filter.chrH= getGaussianVec(sws_chr_gblur, 3.0);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
585 src_filter.chrV= getGaussianVec(sws_chr_gblur, 3.0);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
586 }else{
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
587 src_filter.chrH= getIdentityVec();
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
588 src_filter.chrV= getIdentityVec();
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
589 }
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
590
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
591 if(sws_chr_sharpen!=0.0){
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
592 SwsVector *g= getConstVec(-1.0, 3);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
593 SwsVector *id= getConstVec(10.0/sws_chr_sharpen, 1);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
594 g->coeff[1]=2.0;
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
595 addVec(id, g);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
596 convVec(src_filter.chrH, id);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
597 convVec(src_filter.chrV, id);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
598 freeVec(g);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
599 freeVec(id);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
600 }
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
601
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
602 if(sws_lum_sharpen!=0.0){
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
603 SwsVector *g= getConstVec(-1.0, 3);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
604 SwsVector *id= getConstVec(10.0/sws_lum_sharpen, 1);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
605 g->coeff[1]=2.0;
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
606 addVec(id, g);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
607 convVec(src_filter.lumH, id);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
608 convVec(src_filter.lumV, id);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
609 freeVec(g);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
610 freeVec(id);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
611 }
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
612
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
613 if(sws_chr_hshift)
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
614 shiftVec(src_filter.chrH, sws_chr_hshift);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
615
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
616 if(sws_chr_vshift)
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
617 shiftVec(src_filter.chrV, sws_chr_vshift);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
618
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
619 normalizeVec(src_filter.chrH, 1.0);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
620 normalizeVec(src_filter.chrV, 1.0);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
621 normalizeVec(src_filter.lumH, 1.0);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
622 normalizeVec(src_filter.lumV, 1.0);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
623
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
624 if(verbose > 1) printVec(src_filter.chrH);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
625 if(verbose > 1) printVec(src_filter.lumH);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
626
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
627 switch(sws_flags)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
628 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
629 case 0: flags|= SWS_FAST_BILINEAR; break;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
630 case 1: flags|= SWS_BILINEAR; break;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
631 case 2: flags|= SWS_BICUBIC; break;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
632 case 3: flags|= SWS_X; break;
4401
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
633 case 4: flags|= SWS_POINT; break;
4402
67abbf501b02 area averageing scaling support (-sws 5) (is identical to bilinear for upscale)
michael
parents: 4401
diff changeset
634 case 5: flags|= SWS_AREA; break;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
635 default:flags|= SWS_BILINEAR; break;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
636 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
637
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
638 return getSwsContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, flags, &src_filter, NULL);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
639 }
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
640
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
641
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
642 static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
643 int srcW, int dstW, int filterAlign, int one, int flags,
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
644 SwsVector *srcFilter, SwsVector *dstFilter)
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
645 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
646 int i;
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
647 int filterSize;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
648 int filter2Size;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
649 int minFilterSize;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
650 double *filter=NULL;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
651 double *filter2=NULL;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
652 #ifdef ARCH_X86
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
653 if(gCpuCaps.hasMMX)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
654 asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
655 #endif
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
656
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
657 *filterPos = (int16_t*)memalign(8, (dstW+1)*sizeof(int16_t));
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
658 (*filterPos)[dstW]=0; // the MMX scaler will read over the end
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
659
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
660 if(ABS(xInc - 0x10000) <10) // unscaled
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
661 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
662 int i;
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
663 filterSize= 1;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
664 filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
665 for(i=0; i<dstW*filterSize; i++) filter[i]=0;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
666
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
667 for(i=0; i<dstW; i++)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
668 {
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
669 filter[i*filterSize]=1;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
670 (*filterPos)[i]=i;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
671 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
672
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
673 }
4401
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
674 else if(flags&SWS_POINT) // lame looking point sampling mode
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
675 {
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
676 int i;
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
677 int xDstInSrc;
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
678 filterSize= 1;
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
679 filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
680
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
681 xDstInSrc= xInc/2 - 0x8000;
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
682 for(i=0; i<dstW; i++)
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
683 {
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
684 int xx= (xDstInSrc>>16) - (filterSize>>1) + 1;
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
685
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
686 (*filterPos)[i]= xx;
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
687 filter[i]= 1.0;
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
688 xDstInSrc+= xInc;
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
689 }
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
690 }
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
691 else if(xInc <= (1<<16) || (flags&SWS_FAST_BILINEAR)) // upscale
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
692 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
693 int i;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
694 int xDstInSrc;
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
695 if (flags&SWS_BICUBIC) filterSize= 4;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
696 else if(flags&SWS_X ) filterSize= 4;
4402
67abbf501b02 area averageing scaling support (-sws 5) (is identical to bilinear for upscale)
michael
parents: 4401
diff changeset
697 else filterSize= 2; // SWS_BILINEAR / SWS_AREA
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
698 // printf("%d %d %d\n", filterSize, srcW, dstW);
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
699 filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
700
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
701 xDstInSrc= xInc/2 - 0x8000;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
702 for(i=0; i<dstW; i++)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
703 {
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
704 int xx= (xDstInSrc>>16) - (filterSize>>1) + 1;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
705 int j;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
706
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
707 (*filterPos)[i]= xx;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
708 if((flags & SWS_BICUBIC) || (flags & SWS_X))
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
709 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
710 double d= ABS(((xx+1)<<16) - xDstInSrc)/(double)(1<<16);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
711 double y1,y2,y3,y4;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
712 double A= -0.6;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
713 if(flags & SWS_BICUBIC){
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
714 // Equation is from VirtualDub
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
715 y1 = ( + A*d - 2.0*A*d*d + A*d*d*d);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
716 y2 = (+ 1.0 - (A+3.0)*d*d + (A+2.0)*d*d*d);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
717 y3 = ( - A*d + (2.0*A+3.0)*d*d - (A+2.0)*d*d*d);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
718 y4 = ( + A*d*d - A*d*d*d);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
719 }else{
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
720 // cubic interpolation (derived it myself)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
721 y1 = ( -2.0*d + 3.0*d*d - 1.0*d*d*d)/6.0;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
722 y2 = (6.0 -3.0*d - 6.0*d*d + 3.0*d*d*d)/6.0;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
723 y3 = ( +6.0*d + 3.0*d*d - 3.0*d*d*d)/6.0;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
724 y4 = ( -1.0*d + 1.0*d*d*d)/6.0;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
725 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
726
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
727 // printf("%d %d %d \n", coeff, (int)d, xDstInSrc);
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
728 filter[i*filterSize + 0]= y1;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
729 filter[i*filterSize + 1]= y2;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
730 filter[i*filterSize + 2]= y3;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
731 filter[i*filterSize + 3]= y4;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
732 // printf("%1.3f %1.3f %1.3f %1.3f %1.3f\n",d , y1, y2, y3, y4);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
733 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
734 else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
735 {
4402
67abbf501b02 area averageing scaling support (-sws 5) (is identical to bilinear for upscale)
michael
parents: 4401
diff changeset
736 //Bilinear upscale / linear interpolate / Area averaging
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
737 for(j=0; j<filterSize; j++)
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
738 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
739 double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
740 double coeff= 1.0 - d;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
741 if(coeff<0) coeff=0;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
742 // printf("%d %d %d \n", coeff, (int)d, xDstInSrc);
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
743 filter[i*filterSize + j]= coeff;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
744 xx++;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
745 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
746 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
747 xDstInSrc+= xInc;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
748 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
749 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
750 else // downscale
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
751 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
752 int xDstInSrc;
4402
67abbf501b02 area averageing scaling support (-sws 5) (is identical to bilinear for upscale)
michael
parents: 4401
diff changeset
753 if(flags&SWS_BICUBIC) filterSize= (int)ceil(1 + 4.0*srcW / (double)dstW);
67abbf501b02 area averageing scaling support (-sws 5) (is identical to bilinear for upscale)
michael
parents: 4401
diff changeset
754 else if(flags&SWS_X) filterSize= (int)ceil(1 + 4.0*srcW / (double)dstW);
67abbf501b02 area averageing scaling support (-sws 5) (is identical to bilinear for upscale)
michael
parents: 4401
diff changeset
755 else if(flags&SWS_AREA) filterSize= (int)ceil(1 + 1.0*srcW / (double)dstW);
67abbf501b02 area averageing scaling support (-sws 5) (is identical to bilinear for upscale)
michael
parents: 4401
diff changeset
756 else /* BILINEAR */ filterSize= (int)ceil(1 + 2.0*srcW / (double)dstW);
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
757 // printf("%d %d %d\n", *filterSize, srcW, dstW);
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
758 filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
759
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
760 xDstInSrc= xInc/2 - 0x8000;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
761 for(i=0; i<dstW; i++)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
762 {
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
763 int xx= (int)((double)xDstInSrc/(double)(1<<16) - (filterSize-1)*0.5 + 0.5);
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
764 int j;
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
765 (*filterPos)[i]= xx;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
766 for(j=0; j<filterSize; j++)
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
767 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
768 double d= ABS((xx<<16) - xDstInSrc)/(double)xInc;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
769 double coeff;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
770 if((flags & SWS_BICUBIC) || (flags & SWS_X))
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
771 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
772 double A= -0.75;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
773 // d*=2;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
774 // Equation is from VirtualDub
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
775 if(d<1.0)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
776 coeff = (1.0 - (A+3.0)*d*d + (A+2.0)*d*d*d);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
777 else if(d<2.0)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
778 coeff = (-4.0*A + 8.0*A*d - 5.0*A*d*d + A*d*d*d);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
779 else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
780 coeff=0.0;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
781 }
4402
67abbf501b02 area averageing scaling support (-sws 5) (is identical to bilinear for upscale)
michael
parents: 4401
diff changeset
782 else if(flags & SWS_AREA)
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
783 {
4402
67abbf501b02 area averageing scaling support (-sws 5) (is identical to bilinear for upscale)
michael
parents: 4401
diff changeset
784 double srcPixelSize= (1<<16)/(double)xInc;
67abbf501b02 area averageing scaling support (-sws 5) (is identical to bilinear for upscale)
michael
parents: 4401
diff changeset
785 if(d + srcPixelSize/2 < 0.5) coeff= 1.0;
67abbf501b02 area averageing scaling support (-sws 5) (is identical to bilinear for upscale)
michael
parents: 4401
diff changeset
786 else if(d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
67abbf501b02 area averageing scaling support (-sws 5) (is identical to bilinear for upscale)
michael
parents: 4401
diff changeset
787 else coeff=0.0;
67abbf501b02 area averageing scaling support (-sws 5) (is identical to bilinear for upscale)
michael
parents: 4401
diff changeset
788 }
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
789 else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
790 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
791 coeff= 1.0 - d;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
792 if(coeff<0) coeff=0;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
793 }
4402
67abbf501b02 area averageing scaling support (-sws 5) (is identical to bilinear for upscale)
michael
parents: 4401
diff changeset
794 // printf("%1.3f %2.3f %d \n", coeff, d, xDstInSrc);
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
795 filter[i*filterSize + j]= coeff;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
796 xx++;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
797 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
798 xDstInSrc+= xInc;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
799 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
800 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
801
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
802 /* apply src & dst Filter to filter -> filter2
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
803 free(filter);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
804 */
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
805 filter2Size= filterSize;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
806 if(srcFilter) filter2Size+= srcFilter->length - 1;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
807 if(dstFilter) filter2Size+= dstFilter->length - 1;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
808 filter2= (double*)memalign(8, filter2Size*dstW*sizeof(double));
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
809
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
810 for(i=0; i<dstW; i++)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
811 {
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
812 int j;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
813 SwsVector scaleFilter;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
814 SwsVector *outVec;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
815
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
816 scaleFilter.coeff= filter + i*filterSize;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
817 scaleFilter.length= filterSize;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
818
4294
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
819 if(srcFilter) outVec= getConvVec(srcFilter, &scaleFilter);
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
820 else outVec= &scaleFilter;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
821
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
822 ASSERT(outVec->length == filter2Size)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
823 //FIXME dstFilter
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
824
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
825 for(j=0; j<outVec->length; j++)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
826 {
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
827 filter2[i*filter2Size + j]= outVec->coeff[j];
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
828 }
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
829
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
830 (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
831
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
832 if(outVec != &scaleFilter) freeVec(outVec);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
833 }
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
834 free(filter); filter=NULL;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
835
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
836 /* try to reduce the filter-size (step1 find size and shift left) */
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
837 // Assume its near normalized (*0.5 or *2.0 is ok but * 0.001 is not)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
838 minFilterSize= 0;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
839 for(i=dstW-1; i>=0; i--)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
840 {
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
841 int min= filter2Size;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
842 int j;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
843 double cutOff=0.0;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
844
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
845 /* get rid off near zero elements on the left by shifting left */
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
846 for(j=0; j<filter2Size; j++)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
847 {
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
848 int k;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
849 cutOff += ABS(filter2[i*filter2Size]);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
850
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
851 if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
852
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
853 /* preserve Monotonicity because the core cant handle the filter otherwise */
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
854 if(i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
855
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
856 // Move filter coeffs left
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
857 for(k=1; k<filter2Size; k++)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
858 filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
859 filter2[i*filter2Size + k - 1]= 0.0;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
860 (*filterPos)[i]++;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
861 }
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
862
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
863 cutOff=0.0;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
864 /* count near zeros on the right */
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
865 for(j=filter2Size-1; j>0; j--)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
866 {
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
867 cutOff += ABS(filter2[i*filter2Size + j]);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
868
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
869 if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
870 min--;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
871 }
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
872
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
873 if(min>minFilterSize) minFilterSize= min;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
874 }
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
875
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
876 filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
877 filter= (double*)memalign(8, filterSize*dstW*sizeof(double));
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
878 *outFilterSize= filterSize;
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
879
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
880 if((flags&SWS_PRINT_INFO) && verbose)
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
881 printf("SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
882 /* try to reduce the filter-size (step2 reduce it) */
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
883 for(i=0; i<dstW; i++)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
884 {
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
885 int j;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
886
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
887 for(j=0; j<filterSize; j++)
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
888 {
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
889 if(j>=filter2Size) filter[i*filterSize + j]= 0.0;
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
890 else filter[i*filterSize + j]= filter2[i*filter2Size + j];
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
891 }
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
892 }
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
893 free(filter2); filter2=NULL;
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
894
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
895 ASSERT(filterSize > 0)
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
896
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
897 //FIXME try to align filterpos if possible
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
898
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
899 //fix borders
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
900 for(i=0; i<dstW; i++)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
901 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
902 int j;
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
903 if((*filterPos)[i] < 0)
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
904 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
905 // Move filter coeffs left to compensate for filterPos
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
906 for(j=1; j<filterSize; j++)
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
907 {
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
908 int left= MAX(j + (*filterPos)[i], 0);
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
909 filter[i*filterSize + left] += filter[i*filterSize + j];
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
910 filter[i*filterSize + j]=0;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
911 }
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
912 (*filterPos)[i]= 0;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
913 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
914
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
915 if((*filterPos)[i] + filterSize > srcW)
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
916 {
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
917 int shift= (*filterPos)[i] + filterSize - srcW;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
918 // Move filter coeffs right to compensate for filterPos
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
919 for(j=filterSize-2; j>=0; j--)
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
920 {
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
921 int right= MIN(j + shift, filterSize-1);
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
922 filter[i*filterSize +right] += filter[i*filterSize +j];
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
923 filter[i*filterSize +j]=0;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
924 }
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
925 (*filterPos)[i]= srcW - filterSize;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
926 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
927 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
928
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
929 // Note the +1 is for the MMXscaler which reads over the end
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
930 *outFilter= (int16_t*)memalign(8, *outFilterSize*(dstW+1)*sizeof(int16_t));
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
931 memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t));
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
932
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
933 /* Normalize & Store in outFilter */
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
934 for(i=0; i<dstW; i++)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
935 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
936 int j;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
937 double sum=0;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
938 double scale= one;
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
939 for(j=0; j<filterSize; j++)
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
940 {
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
941 sum+= filter[i*filterSize + j];
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
942 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
943 scale/= sum;
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
944 for(j=0; j<filterSize; j++)
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
945 {
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
946 (*outFilter)[i*(*outFilterSize) + j]= (int)(filter[i*filterSize + j]*scale);
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
947 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
948 }
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
949
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
950 free(filter);
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
951 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
952
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
953 #ifdef ARCH_X86
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
954 static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
955 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
956 uint8_t *fragment;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
957 int imm8OfPShufW1;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
958 int imm8OfPShufW2;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
959 int fragmentLength;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
960
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
961 int xpos, i;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
962
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
963 // create an optimized horizontal scaling routine
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
964
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
965 //code fragment
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
966
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
967 asm volatile(
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
968 "jmp 9f \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
969 // Begin
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
970 "0: \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
971 "movq (%%esi), %%mm0 \n\t" //FIXME Alignment
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
972 "movq %%mm0, %%mm1 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
973 "psrlq $8, %%mm0 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
974 "punpcklbw %%mm7, %%mm1 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
975 "movq %%mm2, %%mm3 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
976 "punpcklbw %%mm7, %%mm0 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
977 "addw %%bx, %%cx \n\t" //2*xalpha += (4*lumXInc)&0xFFFF
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
978 "pshufw $0xFF, %%mm1, %%mm1 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
979 "1: \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
980 "adcl %%edx, %%esi \n\t" //xx+= (4*lumXInc)>>16 + carry
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
981 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
982 "2: \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
983 "psrlw $9, %%mm3 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
984 "psubw %%mm1, %%mm0 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
985 "pmullw %%mm3, %%mm0 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
986 "paddw %%mm6, %%mm2 \n\t" // 2*alpha += xpos&0xFFFF
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
987 "psllw $7, %%mm1 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
988 "paddw %%mm1, %%mm0 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
989
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
990 "movq %%mm0, (%%edi, %%eax) \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
991
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
992 "addl $8, %%eax \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
993 // End
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
994 "9: \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
995 // "int $3\n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
996 "leal 0b, %0 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
997 "leal 1b, %1 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
998 "leal 2b, %2 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
999 "decl %1 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1000 "decl %2 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1001 "subl %0, %1 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1002 "subl %0, %2 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1003 "leal 9b, %3 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1004 "subl %0, %3 \n\t"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1005 :"=r" (fragment), "=r" (imm8OfPShufW1), "=r" (imm8OfPShufW2),
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1006 "=r" (fragmentLength)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1007 );
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1008
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1009 xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1010
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1011 for(i=0; i<dstW/8; i++)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1012 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1013 int xx=xpos>>16;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1014
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1015 if((i&3) == 0)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1016 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1017 int a=0;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1018 int b=((xpos+xInc)>>16) - xx;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1019 int c=((xpos+xInc*2)>>16) - xx;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1020 int d=((xpos+xInc*3)>>16) - xx;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1021
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1022 memcpy(funnyCode + fragmentLength*i/4, fragment, fragmentLength);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1023
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1024 funnyCode[fragmentLength*i/4 + imm8OfPShufW1]=
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1025 funnyCode[fragmentLength*i/4 + imm8OfPShufW2]=
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1026 a | (b<<2) | (c<<4) | (d<<6);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1027
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1028 // if we dont need to read 8 bytes than dont :), reduces the chance of
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1029 // crossing a cache line
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1030 if(d<3) funnyCode[fragmentLength*i/4 + 1]= 0x6E;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1031
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1032 funnyCode[fragmentLength*(i+4)/4]= RET;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1033 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1034 xpos+=xInc;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1035 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1036 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1037 #endif // ARCH_X86
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1038
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1039 //FIXME remove
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1040 void SwScale_Init(){
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1041 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1042
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1043 static void globalInit(){
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1044 // generating tables:
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1045 int i;
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
1046 for(i=0; i<768; i++){
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
1047 int c= MIN(MAX(i-256, 0), 255);
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
1048 clip_table[i]=c;
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
1049 yuvtab_2568[c]= clip_yuvtab_2568[i]=(0x2568*(c-16))+(256<<13);
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
1050 yuvtab_3343[c]= clip_yuvtab_3343[i]=0x3343*(c-128);
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
1051 yuvtab_0c92[c]= clip_yuvtab_0c92[i]=-0x0c92*(c-128);
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
1052 yuvtab_1a1e[c]= clip_yuvtab_1a1e[i]=-0x1a1e*(c-128);
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
1053 yuvtab_40cf[c]= clip_yuvtab_40cf[i]=0x40cf*(c-128);
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1054 }
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1055
2584
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
1056 for(i=0; i<768; i++)
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
1057 {
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1058 int v= clip_table[i];
2584
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
1059 clip_table16b[i]= v>>3;
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
1060 clip_table16g[i]= (v<<3)&0x07E0;
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
1061 clip_table16r[i]= (v<<8)&0xF800;
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
1062 clip_table15b[i]= v>>3;
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
1063 clip_table15g[i]= (v<<2)&0x03E0;
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
1064 clip_table15r[i]= (v<<7)&0x7C00;
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
1065 }
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
1066
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1067 cpuCaps= gCpuCaps;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1068
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1069 #ifdef RUNTIME_CPUDETECT
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1070 #ifdef CAN_COMPILE_X86_ASM
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1071 // ordered per speed fasterst first
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1072 if(gCpuCaps.hasMMX2)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1073 swScale= swScale_MMX2;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1074 else if(gCpuCaps.has3DNow)
4281
michael
parents: 4276
diff changeset
1075 swScale= swScale_3DNow;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1076 else if(gCpuCaps.hasMMX)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1077 swScale= swScale_MMX;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1078 else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1079 swScale= swScale_C;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1080
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1081 #else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1082 swScale= swScale_C;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1083 cpuCaps.hasMMX2 = cpuCaps.hasMMX = cpuCaps.has3DNow = 0;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1084 #endif
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1085 #else //RUNTIME_CPUDETECT
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1086 #ifdef HAVE_MMX2
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1087 swScale= swScale_MMX2;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1088 cpuCaps.has3DNow = 0;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1089 #elif defined (HAVE_3DNOW)
4281
michael
parents: 4276
diff changeset
1090 swScale= swScale_3DNow;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1091 cpuCaps.hasMMX2 = 0;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1092 #elif defined (HAVE_MMX)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1093 swScale= swScale_MMX;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1094 cpuCaps.hasMMX2 = cpuCaps.has3DNow = 0;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1095 #else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1096 swScale= swScale_C;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1097 cpuCaps.hasMMX2 = cpuCaps.hasMMX = cpuCaps.has3DNow = 0;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1098 #endif
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1099 #endif //!RUNTIME_CPUDETECT
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
1100 }
2584
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
1101
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1102
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1103 SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags,
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1104 SwsFilter *srcFilter, SwsFilter *dstFilter){
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1105
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1106 SwsContext *c;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1107 int i;
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1108 SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1109
4294
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1110 #ifdef ARCH_X86
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1111 if(gCpuCaps.hasMMX)
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1112 asm volatile("emms\n\t"::: "memory");
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1113 #endif
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1114
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1115 if(swScale==NULL) globalInit();
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1116
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1117 /* sanity check */
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
1118 if(srcW<4 || srcH<1 || dstW<8 || dstH<1) return NULL; //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
1119
4467
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1120 // if(!isSupportedIn(srcFormat)) return NULL;
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1121 // if(!isSupportedOut(dstFormat)) return NULL;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1122
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1123 if(!dstFilter) dstFilter= &dummyFilter;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1124 if(!srcFilter) srcFilter= &dummyFilter;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1125
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1126 c= memalign(64, sizeof(SwsContext));
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1127 memset(c, 0, sizeof(SwsContext));
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1128
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1129 c->srcW= srcW;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1130 c->srcH= srcH;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1131 c->dstW= dstW;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1132 c->dstH= dstH;
4297
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1133 c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1134 c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1135 c->flags= flags;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1136 c->dstFormat= dstFormat;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1137 c->srcFormat= srcFormat;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1138
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1139 if(cpuCaps.hasMMX2)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1140 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1141 c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1142 if(!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1143 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1144 if(flags&SWS_PRINT_INFO)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1145 fprintf(stderr, "SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1146 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1147 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1148 else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1149 c->canMMX2BeUsed=0;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1150
4467
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1151
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1152 /* dont use full vertical UV input/internaly if the source doesnt even have it */
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1153 if(isHalfChrV(srcFormat)) c->flags= flags= flags&(~SWS_FULL_CHR_V);
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1154 /* dont use full horizontal UV input if the source doesnt even have it */
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1155 if(isHalfChrH(srcFormat)) c->flags= flags= flags&(~SWS_FULL_CHR_H_INP);
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1156 /* dont use full horizontal UV internally if the destination doesnt even have it */
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1157 if(isHalfChrH(dstFormat)) c->flags= flags= flags&(~SWS_FULL_CHR_H_INT);
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1158
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1159 if(flags&SWS_FULL_CHR_H_INP) c->chrSrcW= srcW;
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1160 else c->chrSrcW= (srcW+1)>>1;
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1161
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1162 if(flags&SWS_FULL_CHR_H_INT) c->chrDstW= dstW;
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1163 else c->chrDstW= (dstW+1)>>1;
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1164
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1165 if(flags&SWS_FULL_CHR_V) c->chrSrcH= srcH;
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1166 else c->chrSrcH= (srcH+1)>>1;
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1167
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1168 if(isHalfChrV(dstFormat)) c->chrDstH= (dstH+1)>>1;
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1169 else c->chrDstH= dstH;
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1170
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1171 c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1172 c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1173
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1174
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1175 // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1176 // but only for the FAST_BILINEAR mode otherwise do correct scaling
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1177 // n-2 is the last chrominance sample available
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1178 // this is not perfect, but noone shuld notice the difference, the more correct variant
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1179 // would be like the vertical one, but that would require some special code for the
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1180 // first and last pixel
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1181 if(flags&SWS_FAST_BILINEAR)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1182 {
4467
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1183 if(c->canMMX2BeUsed)
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1184 {
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1185 c->lumXInc+= 20;
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1186 c->chrXInc+= 20;
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1187 }
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1188 //we dont use the x86asm scaler if mmx is available
4467
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1189 else if(cpuCaps.hasMMX)
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1190 {
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1191 c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1192 c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1193 }
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1194 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1195
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1196 /* precalculate horizontal scaler filter coefficients */
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1197 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1198 const int filterAlign= cpuCaps.hasMMX ? 4 : 1;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1199
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1200 initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1201 srcW , dstW, filterAlign, 1<<14, flags,
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1202 srcFilter->lumH, dstFilter->lumH);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1203 initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1204 (srcW+1)>>1, c->chrDstW, filterAlign, 1<<14, flags,
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1205 srcFilter->chrH, dstFilter->chrH);
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1206
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1207 #ifdef ARCH_X86
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1208 // cant downscale !!!
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1209 if(c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1210 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1211 initMMX2HScaler( dstW, c->lumXInc, c->funnyYCode);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1212 initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1213 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1214 #endif
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1215 } // Init Horizontal stuff
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1216
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1217
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1218
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1219 /* precalculate vertical scaler filter coefficients */
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1220 initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1221 srcH , dstH, 1, (1<<12)-4, flags,
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1222 srcFilter->lumV, dstFilter->lumV);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1223 initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1224 (srcH+1)>>1, c->chrDstH, 1, (1<<12)-4, flags,
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1225 srcFilter->chrV, dstFilter->chrV);
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1226
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1227 // Calculate Buffer Sizes so that they wont run out while handling these damn slices
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1228 c->vLumBufSize= c->vLumFilterSize;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1229 c->vChrBufSize= c->vChrFilterSize;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1230 for(i=0; i<dstH; i++)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1231 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1232 int chrI= i*c->chrDstH / dstH;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1233 int nextSlice= MAX(c->vLumFilterPos[i ] + c->vLumFilterSize - 1,
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1234 ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<1));
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1235 nextSlice&= ~1; // Slices start at even boundaries
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1236 if(c->vLumFilterPos[i ] + c->vLumBufSize < nextSlice)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1237 c->vLumBufSize= nextSlice - c->vLumFilterPos[i ];
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1238 if(c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>1))
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1239 c->vChrBufSize= (nextSlice>>1) - c->vChrFilterPos[chrI];
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1240 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1241
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1242 // allocate pixbufs (we use dynamic allocation because otherwise we would need to
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1243 c->lumPixBuf= (int16_t**)memalign(4, c->vLumBufSize*2*sizeof(int16_t*));
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1244 c->chrPixBuf= (int16_t**)memalign(4, c->vChrBufSize*2*sizeof(int16_t*));
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
1245 //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1246 for(i=0; i<c->vLumBufSize; i++)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1247 c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(8, 4000);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1248 for(i=0; i<c->vChrBufSize; i++)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1249 c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(8, 8000);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1250
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1251 //try to avoid drawing green stuff between the right end and the stride end
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1252 for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1253 for(i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1254
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1255 ASSERT(c->chrDstH <= dstH)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1256
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1257 // pack filter data for mmx code
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1258 if(cpuCaps.hasMMX)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1259 {
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1260 c->lumMmxFilter= (int16_t*)memalign(8, c->vLumFilterSize* dstH*4*sizeof(int16_t));
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1261 c->chrMmxFilter= (int16_t*)memalign(8, c->vChrFilterSize*c->chrDstH*4*sizeof(int16_t));
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1262 for(i=0; i<c->vLumFilterSize*dstH; i++)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1263 c->lumMmxFilter[4*i]=c->lumMmxFilter[4*i+1]=c->lumMmxFilter[4*i+2]=c->lumMmxFilter[4*i+3]=
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1264 c->vLumFilter[i];
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1265 for(i=0; i<c->vChrFilterSize*c->chrDstH; i++)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1266 c->chrMmxFilter[4*i]=c->chrMmxFilter[4*i+1]=c->chrMmxFilter[4*i+2]=c->chrMmxFilter[4*i+3]=
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1267 c->vChrFilter[i];
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1268 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1269
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1270 if(flags&SWS_PRINT_INFO)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1271 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1272 #ifdef DITHER1XBPP
4297
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1273 char *dither= " dithered";
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1274 #else
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1275 char *dither= "";
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1276 #endif
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1277 if(flags&SWS_FAST_BILINEAR)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1278 fprintf(stderr, "\nSwScaler: FAST_BILINEAR scaler ");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1279 else if(flags&SWS_BILINEAR)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1280 fprintf(stderr, "\nSwScaler: BILINEAR scaler ");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1281 else if(flags&SWS_BICUBIC)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1282 fprintf(stderr, "\nSwScaler: BICUBIC scaler ");
4467
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1283 else if(flags&SWS_X)
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1284 fprintf(stderr, "\nSwScaler: Experimental scaler ");
4401
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
1285 else if(flags&SWS_POINT)
4402
67abbf501b02 area averageing scaling support (-sws 5) (is identical to bilinear for upscale)
michael
parents: 4401
diff changeset
1286 fprintf(stderr, "\nSwScaler: Nearest Neighbor / POINT scaler ");
67abbf501b02 area averageing scaling support (-sws 5) (is identical to bilinear for upscale)
michael
parents: 4401
diff changeset
1287 else if(flags&SWS_AREA)
67abbf501b02 area averageing scaling support (-sws 5) (is identical to bilinear for upscale)
michael
parents: 4401
diff changeset
1288 fprintf(stderr, "\nSwScaler: Area Averageing scaler ");
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1289 else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1290 fprintf(stderr, "\nSwScaler: ehh flags invalid?! ");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1291
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1292 if(dstFormat==IMGFMT_BGR15)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1293 fprintf(stderr, "with%s BGR15 output ", dither);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1294 else if(dstFormat==IMGFMT_BGR16)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1295 fprintf(stderr, "with%s BGR16 output ", dither);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1296 else if(dstFormat==IMGFMT_BGR24)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1297 fprintf(stderr, "with BGR24 output ");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1298 else if(dstFormat==IMGFMT_BGR32)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1299 fprintf(stderr, "with BGR32 output ");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1300 else if(dstFormat==IMGFMT_YV12)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1301 fprintf(stderr, "with YV12 output ");
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
1302 else if(dstFormat==IMGFMT_I420)
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
1303 fprintf(stderr, "with I420 output ");
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
1304 else if(dstFormat==IMGFMT_IYUV)
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
1305 fprintf(stderr, "with IYUV output ");
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1306 else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1307 fprintf(stderr, "without output ");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1308
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1309 if(cpuCaps.hasMMX2)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1310 fprintf(stderr, "using MMX2\n");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1311 else if(cpuCaps.has3DNow)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1312 fprintf(stderr, "using 3DNOW\n");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1313 else if(cpuCaps.hasMMX)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1314 fprintf(stderr, "using MMX\n");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1315 else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1316 fprintf(stderr, "using C\n");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1317 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1318
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1319 if((flags & SWS_PRINT_INFO) && verbose)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1320 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1321 if(cpuCaps.hasMMX)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1322 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1323 if(c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1324 printf("SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1325 else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1326 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1327 if(c->hLumFilterSize==4)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1328 printf("SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1329 else if(c->hLumFilterSize==8)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1330 printf("SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1331 else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1332 printf("SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1333
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1334 if(c->hChrFilterSize==4)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1335 printf("SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1336 else if(c->hChrFilterSize==8)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1337 printf("SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1338 else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1339 printf("SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1340 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1341 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1342 else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1343 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1344 #ifdef ARCH_X86
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1345 printf("SwScaler: using X86-Asm scaler for horizontal scaling\n");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1346 #else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1347 if(flags & SWS_FAST_BILINEAR)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1348 printf("SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1349 else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1350 printf("SwScaler: using C scaler for horizontal scaling\n");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1351 #endif
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1352 }
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
1353 if(isPlanarYUV(dstFormat))
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1354 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1355 if(c->vLumFilterSize==1)
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
1356 printf("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C");
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1357 else
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
1358 printf("SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C");
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1359 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1360 else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1361 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1362 if(c->vLumFilterSize==1 && c->vChrFilterSize==2)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1363 printf("SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1364 "SwScaler: 2-tap scaler for vertical chrominance scaling (BGR)\n",cpuCaps.hasMMX ? "MMX" : "C");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1365 else if(c->vLumFilterSize==2 && c->vChrFilterSize==2)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1366 printf("SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1367 else
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1368 printf("SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C");
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1369 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1370
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1371 if(dstFormat==IMGFMT_BGR24)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1372 printf("SwScaler: using %s YV12->BGR24 Converter\n",
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1373 cpuCaps.hasMMX2 ? "MMX2" : (cpuCaps.hasMMX ? "MMX" : "C"));
4304
329ecbb6309b minor bugfix in the -v output
michael
parents: 4297
diff changeset
1374 else if(dstFormat==IMGFMT_BGR32)
329ecbb6309b minor bugfix in the -v output
michael
parents: 4297
diff changeset
1375 printf("SwScaler: using %s YV12->BGR32 Converter\n", cpuCaps.hasMMX ? "MMX" : "C");
329ecbb6309b minor bugfix in the -v output
michael
parents: 4297
diff changeset
1376 else if(dstFormat==IMGFMT_BGR16)
329ecbb6309b minor bugfix in the -v output
michael
parents: 4297
diff changeset
1377 printf("SwScaler: using %s YV12->BGR16 Converter\n", cpuCaps.hasMMX ? "MMX" : "C");
329ecbb6309b minor bugfix in the -v output
michael
parents: 4297
diff changeset
1378 else if(dstFormat==IMGFMT_BGR15)
329ecbb6309b minor bugfix in the -v output
michael
parents: 4297
diff changeset
1379 printf("SwScaler: using %s YV12->BGR15 Converter\n", cpuCaps.hasMMX ? "MMX" : "C");
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1380
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1381 printf("SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1382 }
4467
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1383 if((flags & SWS_PRINT_INFO) && verbose>1)
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1384 {
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1385 printf("SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1386 c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1387 printf("SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1388 c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1389 }
9512d6832b38 YUY2, BGR24, BGR32 input support (no mmx yet)
michael
parents: 4421
diff changeset
1390
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1391 return c;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1392 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1393
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1394 /**
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1395 * returns a normalized gaussian curve used to filter stuff
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1396 * quality=3 is high quality, lowwer is lowwer quality
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1397 */
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1398
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1399 SwsVector *getGaussianVec(double variance, double quality){
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1400 const int length= (int)(variance*quality + 0.5) | 1;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1401 int i;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1402 double *coeff= memalign(sizeof(double), length*sizeof(double));
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1403 double middle= (length-1)*0.5;
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1404 SwsVector *vec= malloc(sizeof(SwsVector));
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1405
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1406 vec->coeff= coeff;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1407 vec->length= length;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1408
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1409 for(i=0; i<length; i++)
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1410 {
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1411 double dist= i-middle;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1412 coeff[i]= exp( -dist*dist/(2*variance*variance) ) / sqrt(2*variance*PI);
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1413 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1414
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1415 normalizeVec(vec, 1.0);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1416
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1417 return vec;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1418 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1419
4297
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1420 SwsVector *getConstVec(double c, int length){
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1421 int i;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1422 double *coeff= memalign(sizeof(double), length*sizeof(double));
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1423 SwsVector *vec= malloc(sizeof(SwsVector));
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1424
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1425 vec->coeff= coeff;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1426 vec->length= length;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1427
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1428 for(i=0; i<length; i++)
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1429 coeff[i]= c;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1430
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1431 return vec;
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1432 }
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1433
29fef3982238 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents: 4295
diff changeset
1434
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1435 SwsVector *getIdentityVec(void){
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1436 double *coeff= memalign(sizeof(double), sizeof(double));
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1437 SwsVector *vec= malloc(sizeof(SwsVector));
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1438 coeff[0]= 1.0;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1439
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1440 vec->coeff= coeff;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1441 vec->length= 1;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1442
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1443 return vec;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1444 }
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1445
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1446 void normalizeVec(SwsVector *a, double height){
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1447 int i;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1448 double sum=0;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1449 double inv;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1450
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1451 for(i=0; i<a->length; i++)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1452 sum+= a->coeff[i];
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1453
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1454 inv= height/sum;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1455
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1456 for(i=0; i<a->length; i++)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1457 a->coeff[i]*= height;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1458 }
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1459
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1460 void scaleVec(SwsVector *a, double scalar){
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1461 int i;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1462
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1463 for(i=0; i<a->length; i++)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1464 a->coeff[i]*= scalar;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1465 }
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1466
4294
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1467 static SwsVector *getConvVec(SwsVector *a, SwsVector *b){
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1468 int length= a->length + b->length - 1;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1469 double *coeff= memalign(sizeof(double), length*sizeof(double));
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1470 int i, j;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1471 SwsVector *vec= malloc(sizeof(SwsVector));
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1472
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1473 vec->coeff= coeff;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1474 vec->length= length;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1475
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1476 for(i=0; i<length; i++) coeff[i]= 0.0;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1477
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1478 for(i=0; i<a->length; i++)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1479 {
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1480 for(j=0; j<b->length; j++)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1481 {
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1482 coeff[i+j]+= a->coeff[i]*b->coeff[j];
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1483 }
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1484 }
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1485
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1486 return vec;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1487 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1488
4294
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1489 static SwsVector *sumVec(SwsVector *a, SwsVector *b){
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1490 int length= MAX(a->length, b->length);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1491 double *coeff= memalign(sizeof(double), length*sizeof(double));
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1492 int i;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1493 SwsVector *vec= malloc(sizeof(SwsVector));
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1494
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1495 vec->coeff= coeff;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1496 vec->length= length;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1497
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1498 for(i=0; i<length; i++) coeff[i]= 0.0;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1499
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1500 for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1501 for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1502
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1503 return vec;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1504 }
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1505
4294
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1506 static SwsVector *diffVec(SwsVector *a, SwsVector *b){
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1507 int length= MAX(a->length, b->length);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1508 double *coeff= memalign(sizeof(double), length*sizeof(double));
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1509 int i;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1510 SwsVector *vec= malloc(sizeof(SwsVector));
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1511
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1512 vec->coeff= coeff;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1513 vec->length= length;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1514
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1515 for(i=0; i<length; i++) coeff[i]= 0.0;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1516
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1517 for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1518 for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1519
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1520 return vec;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1521 }
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1522
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1523 /* shift left / or right if "shift" is negative */
4294
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1524 static SwsVector *getShiftedVec(SwsVector *a, int shift){
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1525 int length= a->length + ABS(shift)*2;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1526 double *coeff= memalign(sizeof(double), length*sizeof(double));
4401
8d00348d0d6b nearest neighbor / sdl emulation ;) scaling (-sws 4)
michael
parents: 4304
diff changeset
1527 int i;
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1528 SwsVector *vec= malloc(sizeof(SwsVector));
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1529
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1530 vec->coeff= coeff;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1531 vec->length= length;
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1532
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1533 for(i=0; i<length; i++) coeff[i]= 0.0;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1534
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1535 for(i=0; i<a->length; i++)
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1536 {
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1537 coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1538 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1539
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1540 return vec;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1541 }
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1542
4294
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1543 void shiftVec(SwsVector *a, int shift){
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1544 SwsVector *shifted= getShiftedVec(a, shift);
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1545 free(a->coeff);
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1546 a->coeff= shifted->coeff;
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1547 a->length= shifted->length;
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1548 free(shifted);
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1549 }
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1550
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1551 void addVec(SwsVector *a, SwsVector *b){
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1552 SwsVector *sum= sumVec(a, b);
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1553 free(a->coeff);
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1554 a->coeff= sum->coeff;
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1555 a->length= sum->length;
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1556 free(sum);
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1557 }
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1558
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1559 void subVec(SwsVector *a, SwsVector *b){
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1560 SwsVector *diff= diffVec(a, b);
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1561 free(a->coeff);
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1562 a->coeff= diff->coeff;
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1563 a->length= diff->length;
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1564 free(diff);
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1565 }
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1566
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1567 void convVec(SwsVector *a, SwsVector *b){
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1568 SwsVector *conv= getConvVec(a, b);
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1569 free(a->coeff);
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1570 a->coeff= conv->coeff;
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1571 a->length= conv->length;
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1572 free(conv);
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1573 }
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1574
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1575 SwsVector *cloneVec(SwsVector *a){
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1576 double *coeff= memalign(sizeof(double), a->length*sizeof(double));
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1577 int i;
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1578 SwsVector *vec= malloc(sizeof(SwsVector));
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1579
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1580 vec->coeff= coeff;
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1581 vec->length= a->length;
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1582
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1583 for(i=0; i<a->length; i++) coeff[i]= a->coeff[i];
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1584
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1585 return vec;
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1586 }
21dbbbbd5479 a few filters (should be removed/merged when arpis videofilter stuff is finished)
michael
parents: 4290
diff changeset
1587
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1588 void printVec(SwsVector *a){
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1589 int i;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1590 double max=0;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1591 double min=0;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1592 double range;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1593
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1594 for(i=0; i<a->length; i++)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1595 if(a->coeff[i]>max) max= a->coeff[i];
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1596
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1597 for(i=0; i<a->length; i++)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1598 if(a->coeff[i]<min) min= a->coeff[i];
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1599
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1600 range= max - min;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1601
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1602 for(i=0; i<a->length; i++)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1603 {
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1604 int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1605 printf("%1.3f ", a->coeff[i]);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1606 for(;x>0; x--) printf(" ");
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1607 printf("|\n");
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1608 }
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1609 }
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1610
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1611 void freeVec(SwsVector *a){
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1612 if(!a) return;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1613 if(a->coeff) free(a->coeff);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1614 a->coeff=NULL;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1615 a->length=0;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1616 free(a);
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1617 }
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1618
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1619 void freeSwsContext(SwsContext *c){
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1620 int i;
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1621
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1622 if(!c) return;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1623
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1624 if(c->lumPixBuf)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1625 {
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
1626 for(i=0; i<c->vLumBufSize; i++)
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1627 {
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1628 if(c->lumPixBuf[i]) free(c->lumPixBuf[i]);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1629 c->lumPixBuf[i]=NULL;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1630 }
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1631 free(c->lumPixBuf);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1632 c->lumPixBuf=NULL;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1633 }
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1634
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1635 if(c->chrPixBuf)
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1636 {
4419
f002407e298d YV12 like formats support (I420/IYUV)
michael
parents: 4402
diff changeset
1637 for(i=0; i<c->vChrBufSize; i++)
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1638 {
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1639 if(c->chrPixBuf[i]) free(c->chrPixBuf[i]);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1640 c->chrPixBuf[i]=NULL;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1641 }
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1642 free(c->chrPixBuf);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1643 c->chrPixBuf=NULL;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1644 }
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1645
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1646 if(c->vLumFilter) free(c->vLumFilter);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1647 c->vLumFilter = NULL;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1648 if(c->vChrFilter) free(c->vChrFilter);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1649 c->vChrFilter = NULL;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1650 if(c->hLumFilter) free(c->hLumFilter);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1651 c->hLumFilter = NULL;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1652 if(c->hChrFilter) free(c->hChrFilter);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1653 c->hChrFilter = NULL;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1654
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1655 if(c->vLumFilterPos) free(c->vLumFilterPos);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1656 c->vLumFilterPos = NULL;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1657 if(c->vChrFilterPos) free(c->vChrFilterPos);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1658 c->vChrFilterPos = NULL;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1659 if(c->hLumFilterPos) free(c->hLumFilterPos);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1660 c->hLumFilterPos = NULL;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1661 if(c->hChrFilterPos) free(c->hChrFilterPos);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1662 c->hChrFilterPos = NULL;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1663
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1664 if(c->lumMmxFilter) free(c->lumMmxFilter);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1665 c->lumMmxFilter = NULL;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1666 if(c->chrMmxFilter) free(c->chrMmxFilter);
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1667 c->chrMmxFilter = NULL;
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1668
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1669 free(c);
4276
9199d15cb4e0 removed global vars so that multiple swscalers can be used
michael
parents: 4248
diff changeset
1670 }
4281
michael
parents: 4276
diff changeset
1671
4290
1f8ceb12284d general convolution filtering of the source picture
michael
parents: 4281
diff changeset
1672