annotate postproc/swscale.c @ 3603:baa8b0c0ff30

Removed unnecessary check after the protocol autodetection. Now it will try to start streaming even if the autodetection failed. This will allow to work with web server that doesn't report a proper mime-type.
author bertrand
date Wed, 19 Dec 2001 09:02:52 +0000
parents 64121e8a43f5
children 33c560ffd3dc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
1
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
2 // Software scaling and colorspace conversion routines for MPlayer
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
3
2269
95c48204bcd9 (C) fixed
arpi
parents: 2267
diff changeset
4 // Orginal C implementation by A'rpi/ESP-team <arpi@thot.banki.hu>
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
5 // current version mostly by Michael Niedermayer (michaelni@gmx.at)
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
6 // the parts written by michael are under GNU GPL
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
7
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
8 #include <inttypes.h>
2476
a6c5a537f30a a few warning fixes (missing #include's)
pl
parents: 2469
diff changeset
9 #include <string.h>
3272
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
10 #include <math.h>
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
11 #include <stdio.h>
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
12 #include "../config.h"
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
13 #ifdef HAVE_MALLOC_H
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
14 #include <malloc.h>
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
15 #endif
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
16 #include "swscale.h"
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
17 #include "../cpudetect.h"
2540
f2e70944d02a fixed a warning
michael
parents: 2534
diff changeset
18 #undef MOVNTQ
2680
e8a534509557 green line fix for dstw%8!=0
michael
parents: 2671
diff changeset
19 #undef PAVGB
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
20
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
21 //#undef HAVE_MMX2
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
22 //#undef HAVE_MMX
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
23 //#undef ARCH_X86
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
24 #define DITHER1XBPP
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
25 int fullUVIpol=0;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
26 //disables the unscaled height version
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
27 int allwaysIpol=0;
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
28
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
29 #define RET 0xC3 //near return opcode
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
30
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
31 //#define ASSERT(x) if(!(x)) { printf("ASSERT " #x " failed\n"); *((int*)0)=0; }
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
32 #define ASSERT(x) ;
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
33
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
34 extern int verbose; // defined in mplayer.c
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
35 /*
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
36 NOTES
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
37
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
38 known BUGS with known cause (no bugreports please!, but patches are welcome :) )
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
39 horizontal fast_bilinear MMX2 scaler reads 1-7 samples too much (might cause a sig11)
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
40
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
41 Supported output formats BGR15 BGR16 BGR24 BGR32 YV12
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
42 BGR15 & BGR16 MMX verions support dithering
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
43 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
44
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
45 TODO
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
46 more intelligent missalignment avoidance for the horizontal scaler
2585
bd52b78f12dc c speedup
michael
parents: 2584
diff changeset
47 dither in C
bd52b78f12dc c speedup
michael
parents: 2584
diff changeset
48 change the distance of the u & v buffer
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
49 Move static / global vars into a struct so multiple scalers can be used
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
50 write special vertical cubic upscale version
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
51 Optimize C code (yv12 / minmax)
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
52 dstStride[3]
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
53 */
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
54
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
55 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
2469
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
56 #define MIN(a,b) ((a) > (b) ? (b) : (a))
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
57 #define MAX(a,b) ((a) < (b) ? (b) : (a))
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
58
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
59 #ifdef ARCH_X86
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
60 #define CAN_COMPILE_X86_ASM
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
61 #endif
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
62
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
63 #ifdef CAN_COMPILE_X86_ASM
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
64 static uint64_t __attribute__((aligned(8))) yCoeff= 0x2568256825682568LL;
2503
d21d8d5f2e23 yuv2rgb bugfix
michael
parents: 2476
diff changeset
65 static uint64_t __attribute__((aligned(8))) vrCoeff= 0x3343334333433343LL;
d21d8d5f2e23 yuv2rgb bugfix
michael
parents: 2476
diff changeset
66 static uint64_t __attribute__((aligned(8))) ubCoeff= 0x40cf40cf40cf40cfLL;
d21d8d5f2e23 yuv2rgb bugfix
michael
parents: 2476
diff changeset
67 static uint64_t __attribute__((aligned(8))) vgCoeff= 0xE5E2E5E2E5E2E5E2LL;
d21d8d5f2e23 yuv2rgb bugfix
michael
parents: 2476
diff changeset
68 static uint64_t __attribute__((aligned(8))) ugCoeff= 0xF36EF36EF36EF36ELL;
2669
476b9b3b91be faster bgr15/16
michael
parents: 2638
diff changeset
69 static uint64_t __attribute__((aligned(8))) bF8= 0xF8F8F8F8F8F8F8F8LL;
476b9b3b91be faster bgr15/16
michael
parents: 2638
diff changeset
70 static uint64_t __attribute__((aligned(8))) bFC= 0xFCFCFCFCFCFCFCFCLL;
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
71 static uint64_t __attribute__((aligned(8))) w400= 0x0400040004000400LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
72 static uint64_t __attribute__((aligned(8))) w80= 0x0080008000800080LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
73 static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL;
3272
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
74 static uint64_t __attribute__((aligned(8))) w02= 0x0002000200020002LL;
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
75 static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
76 static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
77 static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
78
2750
9ef09e232505 gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents: 2748
diff changeset
79 static volatile uint64_t __attribute__((aligned(8))) b5Dither;
9ef09e232505 gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents: 2748
diff changeset
80 static volatile uint64_t __attribute__((aligned(8))) g5Dither;
9ef09e232505 gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents: 2748
diff changeset
81 static volatile uint64_t __attribute__((aligned(8))) g6Dither;
9ef09e232505 gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents: 2748
diff changeset
82 static volatile uint64_t __attribute__((aligned(8))) r5Dither;
2748
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
83
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
84 static uint64_t __attribute__((aligned(8))) dither4[2]={
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
85 0x0103010301030103LL,
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
86 0x0200020002000200LL,};
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
87
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
88 static uint64_t __attribute__((aligned(8))) dither8[2]={
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
89 0x0602060206020602LL,
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
90 0x0004000400040004LL,};
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
91
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
92 static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
93 static uint64_t __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
94 static uint64_t __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
95 static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
96 static uint64_t __attribute__((aligned(8))) g15Mask= 0x03E003E003E003E0LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
97 static uint64_t __attribute__((aligned(8))) r15Mask= 0x7C007C007C007C00LL;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
98
2730
c483fc9bf0c4 faster bgr24 output
michael
parents: 2728
diff changeset
99 static uint64_t __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL;
c483fc9bf0c4 faster bgr24 output
michael
parents: 2728
diff changeset
100 static uint64_t __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL;
c483fc9bf0c4 faster bgr24 output
michael
parents: 2728
diff changeset
101 static uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL;
c483fc9bf0c4 faster bgr24 output
michael
parents: 2728
diff changeset
102
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
103 static uint64_t __attribute__((aligned(8))) temp0;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
104 static uint64_t __attribute__((aligned(8))) asm_yalpha1;
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
105 static uint64_t __attribute__((aligned(8))) asm_uvalpha1;
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
106
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
107 static int16_t __attribute__((aligned(8))) *lumPixBuf[2000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
108 static int16_t __attribute__((aligned(8))) *chrPixBuf[2000];
3272
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
109 static int16_t __attribute__((aligned(8))) hLumFilter[8000];
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
110 static int16_t __attribute__((aligned(8))) hLumFilterPos[2000];
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
111 static int16_t __attribute__((aligned(8))) hChrFilter[8000];
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
112 static int16_t __attribute__((aligned(8))) hChrFilterPos[2000];
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
113 static int16_t __attribute__((aligned(8))) vLumFilter[8000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
114 static int16_t __attribute__((aligned(8))) vLumFilterPos[2000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
115 static int16_t __attribute__((aligned(8))) vChrFilter[8000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
116 static int16_t __attribute__((aligned(8))) vChrFilterPos[2000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
117
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
118 // Contain simply the values from v(Lum|Chr)Filter just nicely packed for mmx
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
119 //FIXME these are very likely too small / 8000 caused problems with 480x480
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
120 static int16_t __attribute__((aligned(8))) lumMmxFilter[16000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
121 static int16_t __attribute__((aligned(8))) chrMmxFilter[16000];
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
122 #else
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
123 static int16_t *lumPixBuf[2000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
124 static int16_t *chrPixBuf[2000];
3272
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
125 static int16_t hLumFilter[8000];
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
126 static int16_t hLumFilterPos[2000];
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
127 static int16_t hChrFilter[8000];
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
128 static int16_t hChrFilterPos[2000];
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
129 static int16_t vLumFilter[8000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
130 static int16_t vLumFilterPos[2000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
131 static int16_t vChrFilter[8000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
132 static int16_t vChrFilterPos[2000];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
133 //FIXME just dummy vars
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
134 static int16_t lumMmxFilter[1];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
135 static int16_t chrMmxFilter[1];
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
136 #endif
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
137
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
138 // clipping helper table for C implementations:
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
139 static unsigned char clip_table[768];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
140
2584
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
141 static unsigned short clip_table16b[768];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
142 static unsigned short clip_table16g[768];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
143 static unsigned short clip_table16r[768];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
144 static unsigned short clip_table15b[768];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
145 static unsigned short clip_table15g[768];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
146 static unsigned short clip_table15r[768];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
147
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
148 // yuv->rgb conversion tables:
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
149 static int yuvtab_2568[256];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
150 static int yuvtab_3343[256];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
151 static int yuvtab_0c92[256];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
152 static int yuvtab_1a1e[256];
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
153 static int yuvtab_40cf[256];
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
154 // Needed for cubic scaler to catch overflows
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
155 static int clip_yuvtab_2568[768];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
156 static int clip_yuvtab_3343[768];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
157 static int clip_yuvtab_0c92[768];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
158 static int clip_yuvtab_1a1e[768];
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
159 static int clip_yuvtab_40cf[768];
2264
7851375ea156 increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents: 2237
diff changeset
160
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
161 static int hLumFilterSize=0;
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
162 static int hChrFilterSize=0;
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
163 static int vLumFilterSize=0;
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
164 static int vChrFilterSize=0;
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
165 static int vLumBufSize=0;
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
166 static int vChrBufSize=0;
3272
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
167
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
168 int sws_flags=0;
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
169
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
170 #ifdef CAN_COMPILE_X86_ASM
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
171 static uint8_t funnyYCode[10000];
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
172 static uint8_t funnyUVCode[10000];
2671
555cb027c7a7 fixed warnings
michael
parents: 2669
diff changeset
173 #endif
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
174
2469
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
175 static int canMMX2BeUsed=0;
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
176
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
177 #ifdef CAN_COMPILE_X86_ASM
2671
555cb027c7a7 fixed warnings
michael
parents: 2669
diff changeset
178 void in_asm_used_var_warning_killer()
555cb027c7a7 fixed warnings
michael
parents: 2669
diff changeset
179 {
3272
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
180 volatile int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+
2748
01dbf100b4f8 better dithering
michael
parents: 2730
diff changeset
181 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+temp0+asm_yalpha1+ asm_uvalpha1+
3272
7e4399d1eb65 horizontal up/downscale linear & cubic
michael
parents: 3209
diff changeset
182 M24A+M24B+M24C+w02 + funnyYCode[0]+ funnyUVCode[0]+b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0];
2671
555cb027c7a7 fixed warnings
michael
parents: 2669
diff changeset
183 if(i) i=0;
555cb027c7a7 fixed warnings
michael
parents: 2669
diff changeset
184 }
555cb027c7a7 fixed warnings
michael
parents: 2669
diff changeset
185 #endif
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
186
3352
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
187 static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
188 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
189 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
190 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
191 //FIXME Optimize (just quickly writen not opti..)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
192 int i;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
193 for(i=0; i<dstW; i++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
194 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
195 int val=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
196 int j;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
197 for(j=0; j<lumFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
198 val += lumSrc[j][i] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
199
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
200 dest[i]= MIN(MAX(val>>19, 0), 255);
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
201 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
202
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
203 if(uDest != NULL)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
204 for(i=0; i<(dstW>>1); i++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
205 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
206 int u=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
207 int v=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
208 int j;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
209 for(j=0; j<lumFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
210 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
211 u += chrSrc[j][i] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
212 v += chrSrc[j][i + 2048] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
213 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
214
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
215 uDest[i]= MIN(MAX(u>>19, 0), 255);
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
216 vDest[i]= MIN(MAX(v>>19, 0), 255);
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
217 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
218 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
219
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
220 static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
221 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
222 uint8_t *dest, int dstW, int dstbpp)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
223 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
224 if(dstbpp==32)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
225 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
226 int i;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
227 for(i=0; i<(dstW>>1); i++){
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
228 int j;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
229 int Y1=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
230 int Y2=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
231 int U=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
232 int V=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
233 int Cb, Cr, Cg;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
234 for(j=0; j<lumFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
235 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
236 Y1 += lumSrc[j][2*i] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
237 Y2 += lumSrc[j][2*i+1] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
238 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
239 for(j=0; j<chrFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
240 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
241 U += chrSrc[j][i] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
242 V += chrSrc[j][i+2048] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
243 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
244 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
245 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
246 U >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
247 V >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
248
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
249 Cb= clip_yuvtab_40cf[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
250 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
251 Cr= clip_yuvtab_3343[V+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
252
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
253 dest[8*i+0]=clip_table[((Y1 + Cb) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
254 dest[8*i+1]=clip_table[((Y1 + Cg) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
255 dest[8*i+2]=clip_table[((Y1 + Cr) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
256
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
257 dest[8*i+4]=clip_table[((Y2 + Cb) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
258 dest[8*i+5]=clip_table[((Y2 + Cg) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
259 dest[8*i+6]=clip_table[((Y2 + Cr) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
260 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
261 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
262 else if(dstbpp==24)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
263 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
264 int i;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
265 for(i=0; i<(dstW>>1); i++){
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
266 int j;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
267 int Y1=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
268 int Y2=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
269 int U=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
270 int V=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
271 int Cb, Cr, Cg;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
272 for(j=0; j<lumFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
273 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
274 Y1 += lumSrc[j][2*i] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
275 Y2 += lumSrc[j][2*i+1] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
276 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
277 for(j=0; j<chrFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
278 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
279 U += chrSrc[j][i] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
280 V += chrSrc[j][i+2048] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
281 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
282 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
283 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
284 U >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
285 V >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
286
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
287 Cb= clip_yuvtab_40cf[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
288 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
289 Cr= clip_yuvtab_3343[V+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
290
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
291 dest[0]=clip_table[((Y1 + Cb) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
292 dest[1]=clip_table[((Y1 + Cg) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
293 dest[2]=clip_table[((Y1 + Cr) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
294
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
295 dest[3]=clip_table[((Y2 + Cb) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
296 dest[4]=clip_table[((Y2 + Cg) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
297 dest[5]=clip_table[((Y2 + Cr) >>13)];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
298 dest+=6;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
299 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
300 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
301 else if(dstbpp==16)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
302 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
303 int i;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
304 for(i=0; i<(dstW>>1); i++){
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
305 int j;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
306 int Y1=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
307 int Y2=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
308 int U=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
309 int V=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
310 int Cb, Cr, Cg;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
311 for(j=0; j<lumFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
312 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
313 Y1 += lumSrc[j][2*i] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
314 Y2 += lumSrc[j][2*i+1] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
315 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
316 for(j=0; j<chrFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
317 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
318 U += chrSrc[j][i] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
319 V += chrSrc[j][i+2048] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
320 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
321 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
322 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
323 U >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
324 V >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
325
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
326 Cb= clip_yuvtab_40cf[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
327 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
328 Cr= clip_yuvtab_3343[V+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
329
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
330 ((uint16_t*)dest)[2*i] =
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
331 clip_table16b[(Y1 + Cb) >>13] |
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
332 clip_table16g[(Y1 + Cg) >>13] |
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
333 clip_table16r[(Y1 + Cr) >>13];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
334
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
335 ((uint16_t*)dest)[2*i+1] =
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
336 clip_table16b[(Y2 + Cb) >>13] |
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
337 clip_table16g[(Y2 + Cg) >>13] |
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
338 clip_table16r[(Y2 + Cr) >>13];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
339 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
340 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
341 else if(dstbpp==15)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
342 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
343 int i;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
344 for(i=0; i<(dstW>>1); i++){
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
345 int j;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
346 int Y1=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
347 int Y2=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
348 int U=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
349 int V=0;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
350 int Cb, Cr, Cg;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
351 for(j=0; j<lumFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
352 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
353 Y1 += lumSrc[j][2*i] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
354 Y2 += lumSrc[j][2*i+1] * lumFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
355 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
356 for(j=0; j<chrFilterSize; j++)
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
357 {
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
358 U += chrSrc[j][i] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
359 V += chrSrc[j][i+2048] * chrFilter[j];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
360 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
361 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
362 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
363 U >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
364 V >>= 19;
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
365
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
366 Cb= clip_yuvtab_40cf[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
367 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
368 Cr= clip_yuvtab_3343[V+ 256];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
369
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
370 ((uint16_t*)dest)[2*i] =
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
371 clip_table15b[(Y1 + Cb) >>13] |
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
372 clip_table15g[(Y1 + Cg) >>13] |
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
373 clip_table15r[(Y1 + Cr) >>13];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
374
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
375 ((uint16_t*)dest)[2*i+1] =
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
376 clip_table15b[(Y2 + Cb) >>13] |
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
377 clip_table15g[(Y2 + Cg) >>13] |
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
378 clip_table15r[(Y2 + Cr) >>13];
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
379 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
380 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
381 }
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
382
64121e8a43f5 print more info if -v
michael
parents: 3344
diff changeset
383
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
384 //Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
385 //Plain C versions
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
386 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
387 #define COMPILE_C
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
388 #endif
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
389
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
390 #ifdef CAN_COMPILE_X86_ASM
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
391
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
392 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
393 #define COMPILE_MMX
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
394 #endif
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
395
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
396 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
397 #define COMPILE_MMX2
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
398 #endif
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
399
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
400 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
401 #define COMPILE_3DNOW
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
402 #endif
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
403 #endif //CAN_COMPILE_X86_ASM
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
404
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
405 #undef HAVE_MMX
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
406 #undef HAVE_MMX2
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
407 #undef HAVE_3DNOW
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
408 #undef ARCH_X86
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
409
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
410 #ifdef COMPILE_C
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
411 #undef HAVE_MMX
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
412 #undef HAVE_MMX2
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
413 #undef HAVE_3DNOW
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
414 #undef ARCH_X86
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
415 #define RENAME(a) a ## _C
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
416 #include "swscale_template.c"
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
417 #endif
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
418
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
419 #ifdef CAN_COMPILE_X86_ASM
2576
437ed06579d8 c optimizations
michael
parents: 2575
diff changeset
420
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
421 //X86 versions
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
422 /*
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
423 #undef RENAME
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
424 #undef HAVE_MMX
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
425 #undef HAVE_MMX2
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
426 #undef HAVE_3DNOW
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
427 #define ARCH_X86
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
428 #define RENAME(a) a ## _X86
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
429 #include "swscale_template.c"
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
430 */
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
431 //MMX versions
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
432 #ifdef COMPILE_MMX
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
433 #undef RENAME
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
434 #define HAVE_MMX
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
435 #undef HAVE_MMX2
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
436 #undef HAVE_3DNOW
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
437 #define ARCH_X86
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
438 #define RENAME(a) a ## _MMX
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
439 #include "swscale_template.c"
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
440 #endif
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
441
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
442 //MMX2 versions
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
443 #ifdef COMPILE_MMX2
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
444 #undef RENAME
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
445 #define HAVE_MMX
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
446 #define HAVE_MMX2
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
447 #undef HAVE_3DNOW
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
448 #define ARCH_X86
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
449 #define RENAME(a) a ## _MMX2
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
450 #include "swscale_template.c"
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
451 #endif
2469
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
452
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
453 //3DNOW versions
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
454 #ifdef COMPILE_3DNOW
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
455 #undef RENAME
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
456 #define HAVE_MMX
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
457 #undef HAVE_MMX2
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
458 #define HAVE_3DNOW
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
459 #define ARCH_X86
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
460 #define RENAME(a) a ## _3DNow
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
461 #include "swscale_template.c"
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
462 #endif
2469
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
463
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
464 #endif //CAN_COMPILE_X86_ASM
2469
03abc2743ed6 downscale
michael
parents: 2326
diff changeset
465
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
466 // minor note: the HAVE_xyz is messed up after that line so dont use it
2316
bcb229557e9b fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents: 2297
diff changeset
467
2232
65996b3467d7 MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents: 2230
diff changeset
468
2519
6f3fa9bc3b27 yv12 to yv12 scaler
michael
parents: 2503
diff changeset
469 // *** bilinear scaling and yuv->rgb or yuv->yuv conversion of yv12 slices:
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
470 // *** Note: it's called multiple times while decoding a frame, first time y==0
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
471 // switching the cpu type during a sliced drawing can have bad effects, like sig11
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
472 void SwScale_YV12slice(unsigned char* srcptr[],int stride[], int srcSliceY ,
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
473 int srcSliceH, uint8_t* dstptr[], int dststride, int dstbpp,
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
474 int srcW, int srcH, int dstW, int dstH){
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
475
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
476 #ifdef RUNTIME_CPUDETECT
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
477 #ifdef CAN_COMPILE_X86_ASM
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
478 // ordered per speed fasterst first
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
479 if(gCpuCaps.hasMMX2)
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
480 SwScale_YV12slice_MMX2(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
481 else if(gCpuCaps.has3DNow)
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
482 SwScale_YV12slice_3DNow(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
483 else if(gCpuCaps.hasMMX)
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
484 SwScale_YV12slice_MMX(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
485 else
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
486 SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
487 #else
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
488 SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
2270
56ca174d8169 vertical lines bugfix
michael
parents: 2269
diff changeset
489 #endif
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
490 #else //RUNTIME_CPUDETECT
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
491 #ifdef HAVE_MMX2
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
492 SwScale_YV12slice_MMX2(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
493 #elif defined (HAVE_3DNOW)
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
494 SwScale_YV12slice_3DNow(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
495 #elif defined (HAVE_MMX)
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
496 SwScale_YV12slice_MMX(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
497 #else
3209
0b172eb639f1 swscaler cleanup
michael
parents: 3152
diff changeset
498 SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
3152
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
499 #endif
54710806be56 runtime cpu detection optional (compiles faster)
michael
parents: 3136
diff changeset
500 #endif //!RUNTIME_CPUDETECT
2270
56ca174d8169 vertical lines bugfix
michael
parents: 2269
diff changeset
501
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
502 }
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
503
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
504 void SwScale_Init(){
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
505 // generating tables:
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
506 int i;
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
507 for(i=0; i<768; i++){
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
508 int c= MIN(MAX(i-256, 0), 255);
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
509 clip_table[i]=c;
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
510 yuvtab_2568[c]= clip_yuvtab_2568[i]=(0x2568*(c-16))+(256<<13);
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
511 yuvtab_3343[c]= clip_yuvtab_3343[i]=0x3343*(c-128);
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
512 yuvtab_0c92[c]= clip_yuvtab_0c92[i]=-0x0c92*(c-128);
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
513 yuvtab_1a1e[c]= clip_yuvtab_1a1e[i]=-0x1a1e*(c-128);
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
514 yuvtab_40cf[c]= clip_yuvtab_40cf[i]=0x40cf*(c-128);
2216
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
515 }
9da2a0515184 software yv12->rgb scaler - separated from fsdga
arpi
parents:
diff changeset
516
2584
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
517 for(i=0; i<768; i++)
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
518 {
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
519 int v= clip_table[i];
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
520 clip_table16b[i]= v>>3;
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
521 clip_table16g[i]= (v<<3)&0x07E0;
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
522 clip_table16r[i]= (v<<8)&0xF800;
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
523 clip_table15b[i]= v>>3;
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
524 clip_table15g[i]= (v<<2)&0x03E0;
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
525 clip_table15r[i]= (v<<7)&0x7C00;
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
526 }
3344
e87c59969d17 vertical cubic/linear scaling
michael
parents: 3272
diff changeset
527
3126
e71ae0213431 runtime cpu detection
michael
parents: 2800
diff changeset
528 }
2584
6d20d5d5829f 15/16bit in C speedup
michael
parents: 2576
diff changeset
529