comparison postproc/swscale.c @ 3344:e87c59969d17

vertical cubic/linear scaling prints some info (MMX, dstbpp, ...) mainloop rewritten
author michael
date Thu, 06 Dec 2001 00:10:42 +0000
parents 7e4399d1eb65
children 64121e8a43f5
comparison
equal deleted inserted replaced
3343:58ab1490a0be 3344:e87c59969d17
6 // the parts written by michael are under GNU GPL 6 // the parts written by michael are under GNU GPL
7 7
8 #include <inttypes.h> 8 #include <inttypes.h>
9 #include <string.h> 9 #include <string.h>
10 #include <math.h> 10 #include <math.h>
11 //#include <stdio.h> //FOR DEBUG ONLY 11 #include <stdio.h>
12 #include "../config.h" 12 #include "../config.h"
13 #ifdef HAVE_MALLOC_H
14 #include <malloc.h>
15 #endif
13 #include "swscale.h" 16 #include "swscale.h"
14 #include "../cpudetect.h" 17 #include "../cpudetect.h"
15 #undef MOVNTQ 18 #undef MOVNTQ
16 #undef PAVGB 19 #undef PAVGB
17 20
22 int fullUVIpol=0; 25 int fullUVIpol=0;
23 //disables the unscaled height version 26 //disables the unscaled height version
24 int allwaysIpol=0; 27 int allwaysIpol=0;
25 28
26 #define RET 0xC3 //near return opcode 29 #define RET 0xC3 //near return opcode
30
31 //#define ASSERT(x) if(!(x)) { printf("ASSERT " #x " failed\n"); *((int*)0)=0; }
32 #define ASSERT(x) ;
33
34
27 /* 35 /*
28 NOTES 36 NOTES
29 37
30 known BUGS with known cause (no bugreports please!, but patches are welcome :) ) 38 known BUGS with known cause (no bugreports please!, but patches are welcome :) )
31 horizontal MMX2 scaler reads 1-7 samples too much (might cause a sig11) 39 horizontal MMX2 scaler reads 1-7 samples too much (might cause a sig11)
32 40
33 Supported output formats BGR15 BGR16 BGR24 BGR32 41 Supported output formats BGR15 BGR16 BGR24 BGR32, YV12
34 BGR15 & BGR16 MMX verions support dithering 42 BGR15 & BGR16 MMX verions support dithering
35 Special versions: fast Y 1:1 scaling (no interpolation in y direction) 43 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
36 44
37 TODO 45 TODO
38 more intelligent missalignment avoidance for the horizontal scaler 46 more intelligent missalignment avoidance for the horizontal scaler
39 bicubic scaler
40 dither in C 47 dither in C
41 change the distance of the u & v buffer 48 change the distance of the u & v buffer
42 how to differenciate between x86 an C at runtime ?! (using C for now) 49 Move static / global vars into a struct so multiple scalers can be used
50 write special vertical cubic upscale version
51 Optimize C code (yv12 / minmax)
43 */ 52 */
44 53
45 #define ABS(a) ((a) > 0 ? (a) : (-(a))) 54 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
46 #define MIN(a,b) ((a) > (b) ? (b) : (a)) 55 #define MIN(a,b) ((a) > (b) ? (b) : (a))
47 #define MAX(a,b) ((a) < (b) ? (b) : (a)) 56 #define MAX(a,b) ((a) < (b) ? (b) : (a))
92 101
93 static uint64_t __attribute__((aligned(8))) temp0; 102 static uint64_t __attribute__((aligned(8))) temp0;
94 static uint64_t __attribute__((aligned(8))) asm_yalpha1; 103 static uint64_t __attribute__((aligned(8))) asm_yalpha1;
95 static uint64_t __attribute__((aligned(8))) asm_uvalpha1; 104 static uint64_t __attribute__((aligned(8))) asm_uvalpha1;
96 105
97 // temporary storage for 4 yuv lines: 106 static int16_t __attribute__((aligned(8))) *lumPixBuf[2000];
98 // 16bit for now (mmx likes it more compact) 107 static int16_t __attribute__((aligned(8))) *chrPixBuf[2000];
99 static uint16_t __attribute__((aligned(8))) pix_buf_y[4][2048];
100 static uint16_t __attribute__((aligned(8))) pix_buf_uv[2][2048*2];
101 static int16_t __attribute__((aligned(8))) hLumFilter[8000]; 108 static int16_t __attribute__((aligned(8))) hLumFilter[8000];
102 static int16_t __attribute__((aligned(8))) hLumFilterPos[2000]; 109 static int16_t __attribute__((aligned(8))) hLumFilterPos[2000];
103 static int16_t __attribute__((aligned(8))) hChrFilter[8000]; 110 static int16_t __attribute__((aligned(8))) hChrFilter[8000];
104 static int16_t __attribute__((aligned(8))) hChrFilterPos[2000]; 111 static int16_t __attribute__((aligned(8))) hChrFilterPos[2000];
112 static int16_t __attribute__((aligned(8))) vLumFilter[8000];
113 static int16_t __attribute__((aligned(8))) vLumFilterPos[2000];
114 static int16_t __attribute__((aligned(8))) vChrFilter[8000];
115 static int16_t __attribute__((aligned(8))) vChrFilterPos[2000];
116
117 // Contain simply the values from v(Lum|Chr)Filter just nicely packed for mmx
118 //FIXME these are very likely too small / 8000 caused problems with 480x480
119 static int16_t __attribute__((aligned(8))) lumMmxFilter[16000];
120 static int16_t __attribute__((aligned(8))) chrMmxFilter[16000];
105 #else 121 #else
106 static uint16_t pix_buf_y[4][2048]; 122 static int16_t *lumPixBuf[2000];
107 static uint16_t pix_buf_uv[2][2048*2]; 123 static int16_t *chrPixBuf[2000];
108 static int16_t hLumFilter[8000]; 124 static int16_t hLumFilter[8000];
109 static int16_t hLumFilterPos[2000]; 125 static int16_t hLumFilterPos[2000];
110 static int16_t hChrFilter[8000]; 126 static int16_t hChrFilter[8000];
111 static int16_t hChrFilterPos[2000]; 127 static int16_t hChrFilterPos[2000];
128 static int16_t vLumFilter[8000];
129 static int16_t vLumFilterPos[2000];
130 static int16_t vChrFilter[8000];
131 static int16_t vChrFilterPos[2000];
132 //FIXME just dummy vars
133 static int16_t lumMmxFilter[1];
134 static int16_t chrMmxFilter[1];
112 #endif 135 #endif
113 136
114 // clipping helper table for C implementations: 137 // clipping helper table for C implementations:
115 static unsigned char clip_table[768]; 138 static unsigned char clip_table[768];
116 139
125 static int yuvtab_2568[256]; 148 static int yuvtab_2568[256];
126 static int yuvtab_3343[256]; 149 static int yuvtab_3343[256];
127 static int yuvtab_0c92[256]; 150 static int yuvtab_0c92[256];
128 static int yuvtab_1a1e[256]; 151 static int yuvtab_1a1e[256];
129 static int yuvtab_40cf[256]; 152 static int yuvtab_40cf[256];
130 153 // Needed for cubic scaler to catch overflows
131 static int hLumFilterSize; 154 static int clip_yuvtab_2568[768];
132 static int hChrFilterSize; 155 static int clip_yuvtab_3343[768];
156 static int clip_yuvtab_0c92[768];
157 static int clip_yuvtab_1a1e[768];
158 static int clip_yuvtab_40cf[768];
159
160 static int hLumFilterSize=0;
161 static int hChrFilterSize=0;
162 static int vLumFilterSize=0;
163 static int vChrFilterSize=0;
164 static int vLumBufSize=0;
165 static int vChrBufSize=0;
133 166
134 int sws_flags=0; 167 int sws_flags=0;
135 168
136 #ifdef CAN_COMPILE_X86_ASM 169 #ifdef CAN_COMPILE_X86_ASM
137 static uint8_t funnyYCode[10000]; 170 static uint8_t funnyYCode[10000];
272 } 305 }
273 306
274 void SwScale_Init(){ 307 void SwScale_Init(){
275 // generating tables: 308 // generating tables:
276 int i; 309 int i;
277 for(i=0;i<256;i++){ 310 for(i=0; i<768; i++){
278 clip_table[i]=0; 311 int c= MIN(MAX(i-256, 0), 255);
279 clip_table[i+256]=i; 312 clip_table[i]=c;
280 clip_table[i+512]=255; 313 yuvtab_2568[c]= clip_yuvtab_2568[i]=(0x2568*(c-16))+(256<<13);
281 yuvtab_2568[i]=(0x2568*(i-16))+(256<<13); 314 yuvtab_3343[c]= clip_yuvtab_3343[i]=0x3343*(c-128);
282 yuvtab_3343[i]=0x3343*(i-128); 315 yuvtab_0c92[c]= clip_yuvtab_0c92[i]=-0x0c92*(c-128);
283 yuvtab_0c92[i]=-0x0c92*(i-128); 316 yuvtab_1a1e[c]= clip_yuvtab_1a1e[i]=-0x1a1e*(c-128);
284 yuvtab_1a1e[i]=-0x1a1e*(i-128); 317 yuvtab_40cf[c]= clip_yuvtab_40cf[i]=0x40cf*(c-128);
285 yuvtab_40cf[i]=0x40cf*(i-128);
286 } 318 }
287 319
288 for(i=0; i<768; i++) 320 for(i=0; i<768; i++)
289 { 321 {
290 int v= clip_table[i]; 322 int v= clip_table[i];
293 clip_table16r[i]= (v<<8)&0xF800; 325 clip_table16r[i]= (v<<8)&0xF800;
294 clip_table15b[i]= v>>3; 326 clip_table15b[i]= v>>3;
295 clip_table15g[i]= (v<<2)&0x03E0; 327 clip_table15g[i]= (v<<2)&0x03E0;
296 clip_table15r[i]= (v<<7)&0x7C00; 328 clip_table15r[i]= (v<<7)&0x7C00;
297 } 329 }
330
298 } 331 }
299 332