annotate postproc/swscale.c @ 4271:2c7e6c87fb6f

reworked, picture moves when window moved, fullscreen working with gui, fixed some annoying bugs
author alex
date Sat, 19 Jan 2002 22:43:19 +0000
parents 3cdb86beebce
children 9199d15cb4e0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
2 // Software scaling and colorspace conversion routines for MPlayer
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
3
2269
95c48204bcd9 (C) fixed
arpi
parents: 2267
diff changeset
4 // Orginal C implementation by A'rpi/ESP-team <arpi@thot.banki.hu>
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
5 // current version mostly by Michael Niedermayer (michaelni@gmx.at)
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
6 // the parts written by michael are under GNU GPL
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
7
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
8 #include <inttypes.h>
2476
a6c5a537f30a a few warning fixes (missing #include's)
pl
parents: 2469
diff changeset
9 #include <string.h>
3272
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
10 #include <math.h>
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
11 #include <stdio.h>
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
12 #include "../config.h"
4248
3cdb86beebce mangle for win32 in postproc
atmos4
parents: 3641
diff changeset
13 #include "../mangle.h"
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
14 #ifdef HAVE_MALLOC_H
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
15 #include <malloc.h>
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
16 #endif
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
17 #include "swscale.h"
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
18 #include "../cpudetect.h"
2540
f2e70944d02a fixed a warning
michael
parents: 2534
diff changeset
19 #undef MOVNTQ
2680
e8a534509557 green line fix for dstw%8!=0
michael
parents: 2671
diff changeset
20 #undef PAVGB
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
21
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
22 //#undef HAVE_MMX2
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
23 //#undef HAVE_MMX
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
24 //#undef ARCH_X86
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
25 #define DITHER1XBPP
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
26 int fullUVIpol=0;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
27 //disables the unscaled height version
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
28 int allwaysIpol=0;
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
29
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
30 #define RET 0xC3 //near return opcode
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
31
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
32 //#define ASSERT(x) if(!(x)) { printf("ASSERT " #x " failed\n"); *((int*)0)=0; }
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
33 #define ASSERT(x) ;
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
34
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
35 extern int verbose; // defined in mplayer.c
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
36 /*
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
37 NOTES
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
38
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
39 known BUGS with known cause (no bugreports please!, but patches are welcome :) )
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
40 horizontal fast_bilinear MMX2 scaler reads 1-7 samples too much (might cause a sig11)
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
41
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
42 Supported output formats BGR15 BGR16 BGR24 BGR32 YV12
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
43 BGR15 & BGR16 MMX verions support dithering
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
44 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
45
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
46 TODO
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
47 more intelligent missalignment avoidance for the horizontal scaler
2585
bd52b78f12dc c speedup
michael
parents: 2584
diff changeset
48 dither in C
bd52b78f12dc c speedup
michael
parents: 2584
diff changeset
49 change the distance of the u & v buffer
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
50 Move static / global vars into a struct so multiple scalers can be used
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
51 write special vertical cubic upscale version
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
52 Optimize C code (yv12 / minmax)
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
53 dstStride[3]
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
54 */
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
55
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
56 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
2469
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
57 #define MIN(a,b) ((a) > (b) ? (b) : (a))
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
58 #define MAX(a,b) ((a) < (b) ? (b) : (a))
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
59
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
60 #ifdef ARCH_X86
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
61 #define CAN_COMPILE_X86_ASM
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
62 #endif
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
63
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
64 #ifdef CAN_COMPILE_X86_ASM
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
65 static uint64_t __attribute__((aligned(8))) yCoeff= 0x2568256825682568LL;
2503
d21d8d5f2e23 yuv2rgb bugfix
michael
parents: 2476
diff changeset
66 static uint64_t __attribute__((aligned(8))) vrCoeff= 0x3343334333433343LL;
d21d8d5f2e23 yuv2rgb bugfix
michael
parents: 2476
diff changeset
67 static uint64_t __attribute__((aligned(8))) ubCoeff= 0x40cf40cf40cf40cfLL;
d21d8d5f2e23 yuv2rgb bugfix
michael
parents: 2476
diff changeset
68 static uint64_t __attribute__((aligned(8))) vgCoeff= 0xE5E2E5E2E5E2E5E2LL;
d21d8d5f2e23 yuv2rgb bugfix
michael
parents: 2476
diff changeset
69 static uint64_t __attribute__((aligned(8))) ugCoeff= 0xF36EF36EF36EF36ELL;
2669
476b9b3b91be faster bgr15/16
michael
parents: 2638
diff changeset
70 static uint64_t __attribute__((aligned(8))) bF8= 0xF8F8F8F8F8F8F8F8LL;
476b9b3b91be faster bgr15/16
michael
parents: 2638
diff changeset
71 static uint64_t __attribute__((aligned(8))) bFC= 0xFCFCFCFCFCFCFCFCLL;
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
72 static uint64_t __attribute__((aligned(8))) w400= 0x0400040004000400LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
73 static uint64_t __attribute__((aligned(8))) w80= 0x0080008000800080LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
74 static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL;
3272
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
75 static uint64_t __attribute__((aligned(8))) w02= 0x0002000200020002LL;
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
76 static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
77 static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
78 static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
79
2750
9ef09e232505 gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents: 2748
diff changeset
80 static volatile uint64_t __attribute__((aligned(8))) b5Dither;
9ef09e232505 gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents: 2748
diff changeset
81 static volatile uint64_t __attribute__((aligned(8))) g5Dither;
9ef09e232505 gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents: 2748
diff changeset
82 static volatile uint64_t __attribute__((aligned(8))) g6Dither;
9ef09e232505 gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents: 2748
diff changeset
83 static volatile uint64_t __attribute__((aligned(8))) r5Dither;
2748
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
84
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
85 static uint64_t __attribute__((aligned(8))) dither4[2]={
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
86 0x0103010301030103LL,
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
87 0x0200020002000200LL,};
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
88
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
89 static uint64_t __attribute__((aligned(8))) dither8[2]={
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
90 0x0602060206020602LL,
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
91 0x0004000400040004LL,};
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
92
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
93 static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
94 static uint64_t __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
95 static uint64_t __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
96 static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
97 static uint64_t __attribute__((aligned(8))) g15Mask= 0x03E003E003E003E0LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
98 static uint64_t __attribute__((aligned(8))) r15Mask= 0x7C007C007C007C00LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
99
2730
c483fc9bf0c4 faster bgr24 output
michael
parents: 2728
diff changeset
100 static uint64_t __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL;
c483fc9bf0c4 faster bgr24 output
michael
parents: 2728
diff changeset
101 static uint64_t __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL;
c483fc9bf0c4 faster bgr24 output
michael
parents: 2728
diff changeset
102 static uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL;
c483fc9bf0c4 faster bgr24 output
michael
parents: 2728
diff changeset
103
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
104 static uint64_t __attribute__((aligned(8))) temp0;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
105 static uint64_t __attribute__((aligned(8))) asm_yalpha1;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
106 static uint64_t __attribute__((aligned(8))) asm_uvalpha1;
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
107
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
108 static int16_t __attribute__((aligned(8))) *lumPixBuf[2000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
109 static int16_t __attribute__((aligned(8))) *chrPixBuf[2000];
3272
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
110 static int16_t __attribute__((aligned(8))) hLumFilter[8000];
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
111 static int16_t __attribute__((aligned(8))) hLumFilterPos[2000];
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
112 static int16_t __attribute__((aligned(8))) hChrFilter[8000];
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
113 static int16_t __attribute__((aligned(8))) hChrFilterPos[2000];
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
114 static int16_t __attribute__((aligned(8))) vLumFilter[8000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
115 static int16_t __attribute__((aligned(8))) vLumFilterPos[2000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
116 static int16_t __attribute__((aligned(8))) vChrFilter[8000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
117 static int16_t __attribute__((aligned(8))) vChrFilterPos[2000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
118
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
119 // Contain simply the values from v(Lum|Chr)Filter just nicely packed for mmx
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
120 //FIXME these are very likely too small / 8000 caused problems with 480x480
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
121 static int16_t __attribute__((aligned(8))) lumMmxFilter[16000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
122 static int16_t __attribute__((aligned(8))) chrMmxFilter[16000];
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
123 #else
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
124 static int16_t *lumPixBuf[2000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
125 static int16_t *chrPixBuf[2000];
3272
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
126 static int16_t hLumFilter[8000];
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
127 static int16_t hLumFilterPos[2000];
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
128 static int16_t hChrFilter[8000];
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
129 static int16_t hChrFilterPos[2000];
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
130 static int16_t vLumFilter[8000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
131 static int16_t vLumFilterPos[2000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
132 static int16_t vChrFilter[8000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
133 static int16_t vChrFilterPos[2000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
134 //FIXME just dummy vars
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
135 static int16_t lumMmxFilter[1];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
136 static int16_t chrMmxFilter[1];
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
137 #endif
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
138
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
139 // clipping helper table for C implementations:
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
140 static unsigned char clip_table[768];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
141
2584
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
142 static unsigned short clip_table16b[768];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
143 static unsigned short clip_table16g[768];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
144 static unsigned short clip_table16r[768];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
145 static unsigned short clip_table15b[768];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
146 static unsigned short clip_table15g[768];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
147 static unsigned short clip_table15r[768];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
148
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
149 // yuv->rgb conversion tables:
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
150 static int yuvtab_2568[256];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
151 static int yuvtab_3343[256];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
152 static int yuvtab_0c92[256];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
153 static int yuvtab_1a1e[256];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
154 static int yuvtab_40cf[256];
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
155 // Needed for cubic scaler to catch overflows
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
156 static int clip_yuvtab_2568[768];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
157 static int clip_yuvtab_3343[768];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
158 static int clip_yuvtab_0c92[768];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
159 static int clip_yuvtab_1a1e[768];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
160 static int clip_yuvtab_40cf[768];
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
161
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
162 static int hLumFilterSize=0;
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
163 static int hChrFilterSize=0;
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
164 static int vLumFilterSize=0;
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
165 static int vChrFilterSize=0;
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
166 static int vLumBufSize=0;
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
167 static int vChrBufSize=0;
3272
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
168
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
169 int sws_flags=0;
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
170
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
171 #ifdef CAN_COMPILE_X86_ASM
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
172 static uint8_t funnyYCode[10000];
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
173 static uint8_t funnyUVCode[10000];
2671
555cb027c7a7 fixed warnings
michael
parents: 2669
diff changeset
174 #endif
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
175
2469
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
176 static int canMMX2BeUsed=0;
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
177
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
178 #ifdef CAN_COMPILE_X86_ASM
2671
555cb027c7a7 fixed warnings
michael
parents: 2669
diff changeset
179 void in_asm_used_var_warning_killer()
555cb027c7a7 fixed warnings
michael
parents: 2669
diff changeset
180 {
3272
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
181 volatile int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+
2748
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
182 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+temp0+asm_yalpha1+ asm_uvalpha1+
3272
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
183 M24A+M24B+M24C+w02 + funnyYCode[0]+ funnyUVCode[0]+b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0];
2671
555cb027c7a7 fixed warnings
michael
parents: 2669
diff changeset
184 if(i) i=0;
555cb027c7a7 fixed warnings
michael
parents: 2669
diff changeset
185 }
555cb027c7a7 fixed warnings
michael
parents: 2669
diff changeset
186 #endif
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
187
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
188 static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
189 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
190 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
191 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
192 //FIXME Optimize (just quickly writen not opti..)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
193 int i;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
194 for(i=0; i<dstW; i++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
195 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
196 int val=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
197 int j;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
198 for(j=0; j<lumFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
199 val += lumSrc[j][i] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
200
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
201 dest[i]= MIN(MAX(val>>19, 0), 255);
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
202 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
203
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
204 if(uDest != NULL)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
205 for(i=0; i<(dstW>>1); i++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
206 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
207 int u=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
208 int v=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
209 int j;
3641
33c560ffd3dc minor bugfixes (noone noticed them)
michael
parents: 3352
diff changeset
210 for(j=0; j<chrFilterSize; j++)
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
211 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
212 u += chrSrc[j][i] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
213 v += chrSrc[j][i + 2048] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
214 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
215
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
216 uDest[i]= MIN(MAX(u>>19, 0), 255);
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
217 vDest[i]= MIN(MAX(v>>19, 0), 255);
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
218 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
219 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
220
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
221 static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
222 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
223 uint8_t *dest, int dstW, int dstbpp)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
224 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
225 if(dstbpp==32)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
226 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
227 int i;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
228 for(i=0; i<(dstW>>1); i++){
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
229 int j;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
230 int Y1=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
231 int Y2=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
232 int U=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
233 int V=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
234 int Cb, Cr, Cg;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
235 for(j=0; j<lumFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
236 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
237 Y1 += lumSrc[j][2*i] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
238 Y2 += lumSrc[j][2*i+1] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
239 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
240 for(j=0; j<chrFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
241 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
242 U += chrSrc[j][i] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
243 V += chrSrc[j][i+2048] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
244 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
245 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
246 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
247 U >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
248 V >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
249
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
250 Cb= clip_yuvtab_40cf[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
251 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
252 Cr= clip_yuvtab_3343[V+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
253
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
254 dest[8*i+0]=clip_table[((Y1 + Cb) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
255 dest[8*i+1]=clip_table[((Y1 + Cg) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
256 dest[8*i+2]=clip_table[((Y1 + Cr) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
257
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
258 dest[8*i+4]=clip_table[((Y2 + Cb) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
259 dest[8*i+5]=clip_table[((Y2 + Cg) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
260 dest[8*i+6]=clip_table[((Y2 + Cr) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
261 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
262 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
263 else if(dstbpp==24)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
264 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
265 int i;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
266 for(i=0; i<(dstW>>1); i++){
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
267 int j;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
268 int Y1=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
269 int Y2=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
270 int U=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
271 int V=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
272 int Cb, Cr, Cg;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
273 for(j=0; j<lumFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
274 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
275 Y1 += lumSrc[j][2*i] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
276 Y2 += lumSrc[j][2*i+1] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
277 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
278 for(j=0; j<chrFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
279 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
280 U += chrSrc[j][i] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
281 V += chrSrc[j][i+2048] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
282 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
283 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
284 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
285 U >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
286 V >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
287
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
288 Cb= clip_yuvtab_40cf[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
289 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
290 Cr= clip_yuvtab_3343[V+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
291
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
292 dest[0]=clip_table[((Y1 + Cb) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
293 dest[1]=clip_table[((Y1 + Cg) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
294 dest[2]=clip_table[((Y1 + Cr) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
295
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
296 dest[3]=clip_table[((Y2 + Cb) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
297 dest[4]=clip_table[((Y2 + Cg) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
298 dest[5]=clip_table[((Y2 + Cr) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
299 dest+=6;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
300 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
301 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
302 else if(dstbpp==16)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
303 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
304 int i;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
305 for(i=0; i<(dstW>>1); i++){
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
306 int j;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
307 int Y1=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
308 int Y2=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
309 int U=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
310 int V=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
311 int Cb, Cr, Cg;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
312 for(j=0; j<lumFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
313 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
314 Y1 += lumSrc[j][2*i] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
315 Y2 += lumSrc[j][2*i+1] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
316 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
317 for(j=0; j<chrFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
318 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
319 U += chrSrc[j][i] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
320 V += chrSrc[j][i+2048] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
321 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
322 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
323 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
324 U >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
325 V >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
326
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
327 Cb= clip_yuvtab_40cf[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
328 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
329 Cr= clip_yuvtab_3343[V+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
330
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
331 ((uint16_t*)dest)[2*i] =
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
332 clip_table16b[(Y1 + Cb) >>13] |
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
333 clip_table16g[(Y1 + Cg) >>13] |
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
334 clip_table16r[(Y1 + Cr) >>13];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
335
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
336 ((uint16_t*)dest)[2*i+1] =
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
337 clip_table16b[(Y2 + Cb) >>13] |
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
338 clip_table16g[(Y2 + Cg) >>13] |
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
339 clip_table16r[(Y2 + Cr) >>13];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
340 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
341 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
342 else if(dstbpp==15)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
343 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
344 int i;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
345 for(i=0; i<(dstW>>1); i++){
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
346 int j;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
347 int Y1=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
348 int Y2=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
349 int U=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
350 int V=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
351 int Cb, Cr, Cg;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
352 for(j=0; j<lumFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
353 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
354 Y1 += lumSrc[j][2*i] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
355 Y2 += lumSrc[j][2*i+1] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
356 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
357 for(j=0; j<chrFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
358 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
359 U += chrSrc[j][i] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
360 V += chrSrc[j][i+2048] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
361 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
362 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
363 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
364 U >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
365 V >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
366
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
367 Cb= clip_yuvtab_40cf[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
368 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
369 Cr= clip_yuvtab_3343[V+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
370
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
371 ((uint16_t*)dest)[2*i] =
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
372 clip_table15b[(Y1 + Cb) >>13] |
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
373 clip_table15g[(Y1 + Cg) >>13] |
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
374 clip_table15r[(Y1 + Cr) >>13];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
375
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
376 ((uint16_t*)dest)[2*i+1] =
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
377 clip_table15b[(Y2 + Cb) >>13] |
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
378 clip_table15g[(Y2 + Cg) >>13] |
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
379 clip_table15r[(Y2 + Cr) >>13];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
380 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
381 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
382 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
383
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
384
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
385 //Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
386 //Plain C versions
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
387 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
388 #define COMPILE_C
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
389 #endif
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
390
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
391 #ifdef CAN_COMPILE_X86_ASM
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
392
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
393 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
394 #define COMPILE_MMX
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
395 #endif
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
396
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
397 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
398 #define COMPILE_MMX2
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
399 #endif
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
400
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
401 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
402 #define COMPILE_3DNOW
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
403 #endif
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
404 #endif //CAN_COMPILE_X86_ASM
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
405
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
406 #undef HAVE_MMX
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
407 #undef HAVE_MMX2
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
408 #undef HAVE_3DNOW
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
409 #undef ARCH_X86
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
410
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
411 #ifdef COMPILE_C
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
412 #undef HAVE_MMX
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
413 #undef HAVE_MMX2
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
414 #undef HAVE_3DNOW
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
415 #undef ARCH_X86
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
416 #define RENAME(a) a ## _C
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
417 #include "swscale_template.c"
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
418 #endif
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
419
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
420 #ifdef CAN_COMPILE_X86_ASM
2576
437ed06579d8 c optimizations
michael
parents: 2575
diff changeset
421
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
422 //X86 versions
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
423 /*
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
424 #undef RENAME
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
425 #undef HAVE_MMX
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
426 #undef HAVE_MMX2
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
427 #undef HAVE_3DNOW
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
428 #define ARCH_X86
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
429 #define RENAME(a) a ## _X86
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
430 #include "swscale_template.c"
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
431 */
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
432 //MMX versions
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
433 #ifdef COMPILE_MMX
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
434 #undef RENAME
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
435 #define HAVE_MMX
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
436 #undef HAVE_MMX2
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
437 #undef HAVE_3DNOW
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
438 #define ARCH_X86
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
439 #define RENAME(a) a ## _MMX
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
440 #include "swscale_template.c"
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
441 #endif
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
442
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
443 //MMX2 versions
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
444 #ifdef COMPILE_MMX2
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
445 #undef RENAME
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
446 #define HAVE_MMX
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
447 #define HAVE_MMX2
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
448 #undef HAVE_3DNOW
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
449 #define ARCH_X86
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
450 #define RENAME(a) a ## _MMX2
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
451 #include "swscale_template.c"
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
452 #endif
2469
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
453
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
454 //3DNOW versions
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
455 #ifdef COMPILE_3DNOW
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
456 #undef RENAME
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
457 #define HAVE_MMX
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
458 #undef HAVE_MMX2
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
459 #define HAVE_3DNOW
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
460 #define ARCH_X86
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
461 #define RENAME(a) a ## _3DNow
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
462 #include "swscale_template.c"
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
463 #endif
2469
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
464
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
465 #endif //CAN_COMPILE_X86_ASM
2469
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
466
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
467 // minor note: the HAVE_xyz is messed up after that line so dont use it
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
468
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
469
2519
6f3fa9bc3b27 yv12 to yv12 scaler
michael
parents: 2503
diff changeset
470 // *** bilinear scaling and yuv->rgb or yuv->yuv conversion of yv12 slices:
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
471 // *** Note: it's called multiple times while decoding a frame, first time y==0
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
472 // switching the cpu type during a sliced drawing can have bad effects, like sig11
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
473 void SwScale_YV12slice(unsigned char* srcptr[],int stride[], int srcSliceY ,
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
474 int srcSliceH, uint8_t* dstptr[], int dststride, int dstbpp,
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
475 int srcW, int srcH, int dstW, int dstH){
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
476
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
477 #ifdef RUNTIME_CPUDETECT
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
478 #ifdef CAN_COMPILE_X86_ASM
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
479 // ordered per speed fasterst first
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
480 if(gCpuCaps.hasMMX2)
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
481 SwScale_YV12slice_MMX2(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
482 else if(gCpuCaps.has3DNow)
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
483 SwScale_YV12slice_3DNow(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
484 else if(gCpuCaps.hasMMX)
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
485 SwScale_YV12slice_MMX(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
486 else
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
487 SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
488 #else
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
489 SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
2270
56ca174d8169 vertical lines bugfix
michael
parents: 2269
diff changeset
490 #endif
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
491 #else //RUNTIME_CPUDETECT
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
492 #ifdef HAVE_MMX2
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
493 SwScale_YV12slice_MMX2(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
494 #elif defined (HAVE_3DNOW)
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
495 SwScale_YV12slice_3DNow(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
496 #elif defined (HAVE_MMX)
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
497 SwScale_YV12slice_MMX(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
498 #else
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
499 SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
500 #endif
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
501 #endif //!RUNTIME_CPUDETECT
2270
56ca174d8169 vertical lines bugfix
michael
parents: 2269
diff changeset
502
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
503 }
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
504
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
505 void SwScale_Init(){
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
506 // generating tables:
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
507 int i;
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
508 for(i=0; i<768; i++){
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
509 int c= MIN(MAX(i-256, 0), 255);
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
510 clip_table[i]=c;
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
511 yuvtab_2568[c]= clip_yuvtab_2568[i]=(0x2568*(c-16))+(256<<13);
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
512 yuvtab_3343[c]= clip_yuvtab_3343[i]=0x3343*(c-128);
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
513 yuvtab_0c92[c]= clip_yuvtab_0c92[i]=-0x0c92*(c-128);
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
514 yuvtab_1a1e[c]= clip_yuvtab_1a1e[i]=-0x1a1e*(c-128);
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
515 yuvtab_40cf[c]= clip_yuvtab_40cf[i]=0x40cf*(c-128);
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
516 }
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
517
2584
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
518 for(i=0; i<768; i++)
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
519 {
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
520 int v= clip_table[i];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
521 clip_table16b[i]= v>>3;
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
522 clip_table16g[i]= (v<<3)&0x07E0;
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
523 clip_table16r[i]= (v<<8)&0xF800;
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
524 clip_table15b[i]= v>>3;
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
525 clip_table15g[i]= (v<<2)&0x03E0;
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
526 clip_table15r[i]= (v<<7)&0x7C00;
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
527 }
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
528
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
529 }
2584
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
530