Mercurial > mplayer.hg
comparison postproc/swscale.c @ 4276:9199d15cb4e0
removed global vars so that multiple swscalers can be used
experimental upscaling mode (-sws 3)
general convolution filters support (unfinished)
bugfix for bicubic upscaling
assertion checking if defined MP_DEBUG
checking of the input/output size instead of segfault if its very large
author | michael |
---|---|
date | Sun, 20 Jan 2002 05:30:23 +0000 |
parents | 3cdb86beebce |
children | a7c8f1aec34a |
comparison
equal
deleted
inserted
replaced
4275:818be6ba8758 | 4276:9199d15cb4e0 |
---|---|
2 // Software scaling and colorspace conversion routines for MPlayer | 2 // Software scaling and colorspace conversion routines for MPlayer |
3 | 3 |
4 // Orginal C implementation by A'rpi/ESP-team <arpi@thot.banki.hu> | 4 // Orginal C implementation by A'rpi/ESP-team <arpi@thot.banki.hu> |
5 // current version mostly by Michael Niedermayer (michaelni@gmx.at) | 5 // current version mostly by Michael Niedermayer (michaelni@gmx.at) |
6 // the parts written by michael are under GNU GPL | 6 // the parts written by michael are under GNU GPL |
7 | |
8 /* | |
9 supported Input formats: YV12 (grayscale soon too) | |
10 supported output formats: YV12, BGR15, BGR16, BGR24, BGR32 (grayscale soon too) | |
11 */ | |
7 | 12 |
8 #include <inttypes.h> | 13 #include <inttypes.h> |
9 #include <string.h> | 14 #include <string.h> |
10 #include <math.h> | 15 #include <math.h> |
11 #include <stdio.h> | 16 #include <stdio.h> |
14 #ifdef HAVE_MALLOC_H | 19 #ifdef HAVE_MALLOC_H |
15 #include <malloc.h> | 20 #include <malloc.h> |
16 #endif | 21 #endif |
17 #include "swscale.h" | 22 #include "swscale.h" |
18 #include "../cpudetect.h" | 23 #include "../cpudetect.h" |
24 #include "../libvo/img_format.h" | |
19 #undef MOVNTQ | 25 #undef MOVNTQ |
20 #undef PAVGB | 26 #undef PAVGB |
21 | 27 |
22 //#undef HAVE_MMX2 | 28 //#undef HAVE_MMX2 |
23 //#undef HAVE_MMX | 29 //#undef HAVE_MMX |
24 //#undef ARCH_X86 | 30 //#undef ARCH_X86 |
25 #define DITHER1XBPP | 31 #define DITHER1XBPP |
26 int fullUVIpol=0; | |
27 //disables the unscaled height version | |
28 int allwaysIpol=0; | |
29 | 32 |
30 #define RET 0xC3 //near return opcode | 33 #define RET 0xC3 //near return opcode |
31 | 34 |
32 //#define ASSERT(x) if(!(x)) { printf("ASSERT " #x " failed\n"); *((int*)0)=0; } | 35 #ifdef MP_DEBUG |
36 #define ASSERT(x) if(!(x)) { printf("ASSERT " #x " failed\n"); *((int*)0)=0; } | |
37 #else | |
33 #define ASSERT(x) ; | 38 #define ASSERT(x) ; |
39 #endif | |
40 | |
41 #ifdef M_PI | |
42 #define PI M_PI | |
43 #else | |
44 #define PI 3.14159265358979323846 | |
45 #endif | |
34 | 46 |
35 extern int verbose; // defined in mplayer.c | 47 extern int verbose; // defined in mplayer.c |
36 /* | 48 /* |
37 NOTES | 49 NOTES |
38 | 50 |
48 dither in C | 60 dither in C |
49 change the distance of the u & v buffer | 61 change the distance of the u & v buffer |
50 Move static / global vars into a struct so multiple scalers can be used | 62 Move static / global vars into a struct so multiple scalers can be used |
51 write special vertical cubic upscale version | 63 write special vertical cubic upscale version |
52 Optimize C code (yv12 / minmax) | 64 Optimize C code (yv12 / minmax) |
53 dstStride[3] | |
54 */ | 65 */ |
55 | 66 |
56 #define ABS(a) ((a) > 0 ? (a) : (-(a))) | 67 #define ABS(a) ((a) > 0 ? (a) : (-(a))) |
57 #define MIN(a,b) ((a) > (b) ? (b) : (a)) | 68 #define MIN(a,b) ((a) > (b) ? (b) : (a)) |
58 #define MAX(a,b) ((a) < (b) ? (b) : (a)) | 69 #define MAX(a,b) ((a) < (b) ? (b) : (a)) |
99 | 110 |
100 static uint64_t __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL; | 111 static uint64_t __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL; |
101 static uint64_t __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL; | 112 static uint64_t __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL; |
102 static uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL; | 113 static uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL; |
103 | 114 |
104 static uint64_t __attribute__((aligned(8))) temp0; | 115 // FIXME remove |
105 static uint64_t __attribute__((aligned(8))) asm_yalpha1; | 116 static uint64_t __attribute__((aligned(8))) asm_yalpha1; |
106 static uint64_t __attribute__((aligned(8))) asm_uvalpha1; | 117 static uint64_t __attribute__((aligned(8))) asm_uvalpha1; |
107 | |
108 static int16_t __attribute__((aligned(8))) *lumPixBuf[2000]; | |
109 static int16_t __attribute__((aligned(8))) *chrPixBuf[2000]; | |
110 static int16_t __attribute__((aligned(8))) hLumFilter[8000]; | |
111 static int16_t __attribute__((aligned(8))) hLumFilterPos[2000]; | |
112 static int16_t __attribute__((aligned(8))) hChrFilter[8000]; | |
113 static int16_t __attribute__((aligned(8))) hChrFilterPos[2000]; | |
114 static int16_t __attribute__((aligned(8))) vLumFilter[8000]; | |
115 static int16_t __attribute__((aligned(8))) vLumFilterPos[2000]; | |
116 static int16_t __attribute__((aligned(8))) vChrFilter[8000]; | |
117 static int16_t __attribute__((aligned(8))) vChrFilterPos[2000]; | |
118 | |
119 // Contain simply the values from v(Lum|Chr)Filter just nicely packed for mmx | |
120 //FIXME these are very likely too small / 8000 caused problems with 480x480 | |
121 static int16_t __attribute__((aligned(8))) lumMmxFilter[16000]; | |
122 static int16_t __attribute__((aligned(8))) chrMmxFilter[16000]; | |
123 #else | |
124 static int16_t *lumPixBuf[2000]; | |
125 static int16_t *chrPixBuf[2000]; | |
126 static int16_t hLumFilter[8000]; | |
127 static int16_t hLumFilterPos[2000]; | |
128 static int16_t hChrFilter[8000]; | |
129 static int16_t hChrFilterPos[2000]; | |
130 static int16_t vLumFilter[8000]; | |
131 static int16_t vLumFilterPos[2000]; | |
132 static int16_t vChrFilter[8000]; | |
133 static int16_t vChrFilterPos[2000]; | |
134 //FIXME just dummy vars | |
135 static int16_t lumMmxFilter[1]; | |
136 static int16_t chrMmxFilter[1]; | |
137 #endif | 118 #endif |
138 | 119 |
139 // clipping helper table for C implementations: | 120 // clipping helper table for C implementations: |
140 static unsigned char clip_table[768]; | 121 static unsigned char clip_table[768]; |
141 | 122 |
157 static int clip_yuvtab_3343[768]; | 138 static int clip_yuvtab_3343[768]; |
158 static int clip_yuvtab_0c92[768]; | 139 static int clip_yuvtab_0c92[768]; |
159 static int clip_yuvtab_1a1e[768]; | 140 static int clip_yuvtab_1a1e[768]; |
160 static int clip_yuvtab_40cf[768]; | 141 static int clip_yuvtab_40cf[768]; |
161 | 142 |
162 static int hLumFilterSize=0; | 143 //global sws_flags from the command line |
163 static int hChrFilterSize=0; | |
164 static int vLumFilterSize=0; | |
165 static int vChrFilterSize=0; | |
166 static int vLumBufSize=0; | |
167 static int vChrBufSize=0; | |
168 | |
169 int sws_flags=0; | 144 int sws_flags=0; |
170 | 145 |
171 #ifdef CAN_COMPILE_X86_ASM | 146 /* cpuCaps combined from cpudetect and whats actually compiled in |
172 static uint8_t funnyYCode[10000]; | 147 (if there is no support for something compiled in it wont appear here) */ |
173 static uint8_t funnyUVCode[10000]; | 148 static CpuCaps cpuCaps; |
174 #endif | 149 |
175 | 150 void (*swScale)(SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY, |
176 static int canMMX2BeUsed=0; | 151 int srcSliceH, uint8_t* dst[], int dstStride[])=NULL; |
177 | 152 |
178 #ifdef CAN_COMPILE_X86_ASM | 153 #ifdef CAN_COMPILE_X86_ASM |
179 void in_asm_used_var_warning_killer() | 154 void in_asm_used_var_warning_killer() |
180 { | 155 { |
181 volatile int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+ | 156 volatile int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+ |
182 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+temp0+asm_yalpha1+ asm_uvalpha1+ | 157 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+asm_yalpha1+ asm_uvalpha1+ |
183 M24A+M24B+M24C+w02 + funnyYCode[0]+ funnyUVCode[0]+b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]; | 158 M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]; |
184 if(i) i=0; | 159 if(i) i=0; |
185 } | 160 } |
186 #endif | 161 #endif |
187 | 162 |
188 static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, | 163 static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, |
218 } | 193 } |
219 } | 194 } |
220 | 195 |
221 static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, | 196 static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, |
222 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, | 197 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, |
223 uint8_t *dest, int dstW, int dstbpp) | 198 uint8_t *dest, int dstW, int dstFormat) |
224 { | 199 { |
225 if(dstbpp==32) | 200 if(dstFormat==IMGFMT_BGR32) |
226 { | 201 { |
227 int i; | 202 int i; |
228 for(i=0; i<(dstW>>1); i++){ | 203 for(i=0; i<(dstW>>1); i++){ |
229 int j; | 204 int j; |
230 int Y1=0; | 205 int Y1=0; |
258 dest[8*i+4]=clip_table[((Y2 + Cb) >>13)]; | 233 dest[8*i+4]=clip_table[((Y2 + Cb) >>13)]; |
259 dest[8*i+5]=clip_table[((Y2 + Cg) >>13)]; | 234 dest[8*i+5]=clip_table[((Y2 + Cg) >>13)]; |
260 dest[8*i+6]=clip_table[((Y2 + Cr) >>13)]; | 235 dest[8*i+6]=clip_table[((Y2 + Cr) >>13)]; |
261 } | 236 } |
262 } | 237 } |
263 else if(dstbpp==24) | 238 else if(dstFormat==IMGFMT_BGR24) |
264 { | 239 { |
265 int i; | 240 int i; |
266 for(i=0; i<(dstW>>1); i++){ | 241 for(i=0; i<(dstW>>1); i++){ |
267 int j; | 242 int j; |
268 int Y1=0; | 243 int Y1=0; |
297 dest[4]=clip_table[((Y2 + Cg) >>13)]; | 272 dest[4]=clip_table[((Y2 + Cg) >>13)]; |
298 dest[5]=clip_table[((Y2 + Cr) >>13)]; | 273 dest[5]=clip_table[((Y2 + Cr) >>13)]; |
299 dest+=6; | 274 dest+=6; |
300 } | 275 } |
301 } | 276 } |
302 else if(dstbpp==16) | 277 else if(dstFormat==IMGFMT_BGR16) |
303 { | 278 { |
304 int i; | 279 int i; |
305 for(i=0; i<(dstW>>1); i++){ | 280 for(i=0; i<(dstW>>1); i++){ |
306 int j; | 281 int j; |
307 int Y1=0; | 282 int Y1=0; |
337 clip_table16b[(Y2 + Cb) >>13] | | 312 clip_table16b[(Y2 + Cb) >>13] | |
338 clip_table16g[(Y2 + Cg) >>13] | | 313 clip_table16g[(Y2 + Cg) >>13] | |
339 clip_table16r[(Y2 + Cr) >>13]; | 314 clip_table16r[(Y2 + Cr) >>13]; |
340 } | 315 } |
341 } | 316 } |
342 else if(dstbpp==15) | 317 else if(dstFormat==IMGFMT_BGR15) |
343 { | 318 { |
344 int i; | 319 int i; |
345 for(i=0; i<(dstW>>1); i++){ | 320 for(i=0; i<(dstW>>1); i++){ |
346 int j; | 321 int j; |
347 int Y1=0; | 322 int Y1=0; |
465 #endif //CAN_COMPILE_X86_ASM | 440 #endif //CAN_COMPILE_X86_ASM |
466 | 441 |
467 // minor note: the HAVE_xyz is messed up after that line so dont use it | 442 // minor note: the HAVE_xyz is messed up after that line so dont use it |
468 | 443 |
469 | 444 |
470 // *** bilinear scaling and yuv->rgb or yuv->yuv conversion of yv12 slices: | 445 // old global scaler, dont use for new code |
471 // *** Note: it's called multiple times while decoding a frame, first time y==0 | 446 // will use sws_flags from the command line |
472 // switching the cpu type during a sliced drawing can have bad effects, like sig11 | 447 void SwScale_YV12slice(unsigned char* src[], int srcStride[], int srcSliceY , |
473 void SwScale_YV12slice(unsigned char* srcptr[],int stride[], int srcSliceY , | 448 int srcSliceH, uint8_t* dst[], int dstStride, int dstbpp, |
474 int srcSliceH, uint8_t* dstptr[], int dststride, int dstbpp, | |
475 int srcW, int srcH, int dstW, int dstH){ | 449 int srcW, int srcH, int dstW, int dstH){ |
476 | 450 |
477 #ifdef RUNTIME_CPUDETECT | 451 static SwsContext *context=NULL; |
478 #ifdef CAN_COMPILE_X86_ASM | 452 int dstFormat; |
479 // ordered per speed fasterst first | 453 int flags=0; |
480 if(gCpuCaps.hasMMX2) | 454 static int firstTime=1; |
481 SwScale_YV12slice_MMX2(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); | 455 int dstStride3[3]= {dstStride, dstStride>>1, dstStride>>1}; |
482 else if(gCpuCaps.has3DNow) | 456 |
483 SwScale_YV12slice_3DNow(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); | 457 if(firstTime) |
484 else if(gCpuCaps.hasMMX) | 458 { |
485 SwScale_YV12slice_MMX(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); | 459 flags= SWS_PRINT_INFO; |
486 else | 460 firstTime=0; |
487 SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); | 461 } |
488 #else | 462 |
489 SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); | 463 switch(dstbpp) |
490 #endif | 464 { |
491 #else //RUNTIME_CPUDETECT | 465 case 8 : dstFormat= IMGFMT_Y8; break; |
492 #ifdef HAVE_MMX2 | 466 case 12: dstFormat= IMGFMT_YV12; break; |
493 SwScale_YV12slice_MMX2(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); | 467 case 15: dstFormat= IMGFMT_BGR15; break; |
494 #elif defined (HAVE_3DNOW) | 468 case 16: dstFormat= IMGFMT_BGR16; break; |
495 SwScale_YV12slice_3DNow(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); | 469 case 24: dstFormat= IMGFMT_BGR24; break; |
496 #elif defined (HAVE_MMX) | 470 case 32: dstFormat= IMGFMT_BGR32; break; |
497 SwScale_YV12slice_MMX(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); | 471 default: return; |
498 #else | 472 } |
499 SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); | 473 |
500 #endif | 474 switch(sws_flags) |
501 #endif //!RUNTIME_CPUDETECT | 475 { |
502 | 476 case 0: flags|= SWS_FAST_BILINEAR; break; |
503 } | 477 case 1: flags|= SWS_BILINEAR; break; |
504 | 478 case 2: flags|= SWS_BICUBIC; break; |
479 case 3: flags|= SWS_X; break; | |
480 default:flags|= SWS_BILINEAR; break; | |
481 } | |
482 | |
483 if(!context) context=getSwsContext(srcW, srcH, IMGFMT_YV12, dstW, dstH, dstFormat, flags, NULL, NULL); | |
484 | |
485 | |
486 swScale(context, src, srcStride, srcSliceY, srcSliceH, dst, dstStride3); | |
487 } | |
488 | |
489 static inline void initFilter(int16_t *dstFilter, int16_t *filterPos, int *filterSize, int xInc, | |
490 int srcW, int dstW, int filterAlign, int one, int flags) | |
491 { | |
492 int i; | |
493 double filter[10000]; | |
494 #ifdef ARCH_X86 | |
495 if(gCpuCaps.hasMMX) | |
496 asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions) | |
497 #endif | |
498 | |
499 if(ABS(xInc - 0x10000) <10) // unscaled | |
500 { | |
501 int i; | |
502 *filterSize= (1 +(filterAlign-1)) & (~(filterAlign-1)); // 1 or 4 normaly | |
503 for(i=0; i<dstW*(*filterSize); i++) filter[i]=0; | |
504 | |
505 for(i=0; i<dstW; i++) | |
506 { | |
507 filter[i*(*filterSize)]=1; | |
508 filterPos[i]=i; | |
509 } | |
510 | |
511 } | |
512 else if(xInc <= (1<<16) || (flags&SWS_FAST_BILINEAR)) // upscale | |
513 { | |
514 int i; | |
515 int xDstInSrc; | |
516 if (flags&SWS_BICUBIC) *filterSize= 4; | |
517 else if(flags&SWS_X ) *filterSize= 4; | |
518 else *filterSize= 2; | |
519 // printf("%d %d %d\n", filterSize, srcW, dstW); | |
520 *filterSize= (*filterSize +(filterAlign-1)) & (~(filterAlign-1)); | |
521 | |
522 xDstInSrc= xInc/2 - 0x8000; | |
523 for(i=0; i<dstW; i++) | |
524 { | |
525 int xx= (xDstInSrc>>16) - (*filterSize>>1) + 1; | |
526 int j; | |
527 | |
528 filterPos[i]= xx; | |
529 if((flags & SWS_BICUBIC) || (flags & SWS_X)) | |
530 { | |
531 double d= ABS(((xx+1)<<16) - xDstInSrc)/(double)(1<<16); | |
532 double y1,y2,y3,y4; | |
533 double A= -0.6; | |
534 if(flags & SWS_BICUBIC){ | |
535 // Equation is from VirtualDub | |
536 y1 = ( + A*d - 2.0*A*d*d + A*d*d*d); | |
537 y2 = (+ 1.0 - (A+3.0)*d*d + (A+2.0)*d*d*d); | |
538 y3 = ( - A*d + (2.0*A+3.0)*d*d - (A+2.0)*d*d*d); | |
539 y4 = ( + A*d*d - A*d*d*d); | |
540 }else{ | |
541 // cubic interpolation (derived it myself) | |
542 y1 = ( -2.0*d + 3.0*d*d - 1.0*d*d*d)/6.0; | |
543 y2 = (6.0 -3.0*d - 6.0*d*d + 3.0*d*d*d)/6.0; | |
544 y3 = ( +6.0*d + 3.0*d*d - 3.0*d*d*d)/6.0; | |
545 y4 = ( -1.0*d + 1.0*d*d*d)/6.0; | |
546 } | |
547 | |
548 // printf("%d %d %d \n", coeff, (int)d, xDstInSrc); | |
549 filter[i*(*filterSize) + 0]= y1; | |
550 filter[i*(*filterSize) + 1]= y2; | |
551 filter[i*(*filterSize) + 2]= y3; | |
552 filter[i*(*filterSize) + 3]= y4; | |
553 // printf("%1.3f %1.3f %1.3f %1.3f %1.3f\n",d , y1, y2, y3, y4); | |
554 } | |
555 else | |
556 { | |
557 for(j=0; j<*filterSize; j++) | |
558 { | |
559 double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16); | |
560 double coeff= 1.0 - d; | |
561 if(coeff<0) coeff=0; | |
562 // printf("%d %d %d \n", coeff, (int)d, xDstInSrc); | |
563 filter[i*(*filterSize) + j]= coeff; | |
564 xx++; | |
565 } | |
566 } | |
567 xDstInSrc+= xInc; | |
568 } | |
569 } | |
570 else // downscale | |
571 { | |
572 int xDstInSrc; | |
573 if(flags&SWS_BICUBIC) *filterSize= (int)ceil(1 + 4.0*srcW / (double)dstW); | |
574 else if(flags&SWS_X) *filterSize= (int)ceil(1 + 4.0*srcW / (double)dstW); | |
575 else *filterSize= (int)ceil(1 + 2.0*srcW / (double)dstW); | |
576 // printf("%d %d %d\n", *filterSize, srcW, dstW); | |
577 *filterSize= (*filterSize +(filterAlign-1)) & (~(filterAlign-1)); | |
578 | |
579 xDstInSrc= xInc/2 - 0x8000; | |
580 for(i=0; i<dstW; i++) | |
581 { | |
582 int xx= (int)((double)xDstInSrc/(double)(1<<16) - ((*filterSize)-1)*0.5 + 0.5); | |
583 int j; | |
584 filterPos[i]= xx; | |
585 for(j=0; j<*filterSize; j++) | |
586 { | |
587 double d= ABS((xx<<16) - xDstInSrc)/(double)xInc; | |
588 double coeff; | |
589 if((flags & SWS_BICUBIC) || (flags & SWS_X)) | |
590 { | |
591 double A= -0.75; | |
592 // d*=2; | |
593 // Equation is from VirtualDub | |
594 if(d<1.0) | |
595 coeff = (1.0 - (A+3.0)*d*d + (A+2.0)*d*d*d); | |
596 else if(d<2.0) | |
597 coeff = (-4.0*A + 8.0*A*d - 5.0*A*d*d + A*d*d*d); | |
598 else | |
599 coeff=0.0; | |
600 } | |
601 /* else if(flags & SWS_X) | |
602 { | |
603 }*/ | |
604 else | |
605 { | |
606 coeff= 1.0 - d; | |
607 if(coeff<0) coeff=0; | |
608 } | |
609 // printf("%1.3f %d %d \n", coeff, (int)d, xDstInSrc); | |
610 filter[i*(*filterSize) + j]= coeff; | |
611 xx++; | |
612 } | |
613 xDstInSrc+= xInc; | |
614 } | |
615 } | |
616 | |
617 //fix borders | |
618 for(i=0; i<dstW; i++) | |
619 { | |
620 int j; | |
621 if(filterPos[i] < 0) | |
622 { | |
623 // Move filter coeffs left to compensate for filterPos | |
624 for(j=1; j<*filterSize; j++) | |
625 { | |
626 int left= MAX(j + filterPos[i], 0); | |
627 filter[i*(*filterSize) + left] += filter[i*(*filterSize) + j]; | |
628 filter[i*(*filterSize) + j]=0; | |
629 } | |
630 filterPos[i]= 0; | |
631 } | |
632 | |
633 if(filterPos[i] + (*filterSize) > srcW) | |
634 { | |
635 int shift= filterPos[i] + (*filterSize) - srcW; | |
636 // Move filter coeffs right to compensate for filterPos | |
637 for(j=(*filterSize)-2; j>=0; j--) | |
638 { | |
639 int right= MIN(j + shift, (*filterSize)-1); | |
640 filter[i*(*filterSize) +right] += filter[i*(*filterSize) +j]; | |
641 filter[i*(*filterSize) +j]=0; | |
642 } | |
643 filterPos[i]= srcW - (*filterSize); | |
644 } | |
645 } | |
646 | |
647 //FIXME try to align filterpos if possible / try to shift filterpos to put zeros at the end | |
648 // and skip these than later | |
649 | |
650 //Normalize | |
651 for(i=0; i<dstW; i++) | |
652 { | |
653 int j; | |
654 double sum=0; | |
655 double scale= one; | |
656 for(j=0; j<*filterSize; j++) | |
657 { | |
658 sum+= filter[i*(*filterSize) + j]; | |
659 } | |
660 scale/= sum; | |
661 for(j=0; j<*filterSize; j++) | |
662 { | |
663 dstFilter[i*(*filterSize) + j]= (int)(filter[i*(*filterSize) + j]*scale); | |
664 } | |
665 } | |
666 } | |
667 | |
668 #ifdef ARCH_X86 | |
669 static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode) | |
670 { | |
671 uint8_t *fragment; | |
672 int imm8OfPShufW1; | |
673 int imm8OfPShufW2; | |
674 int fragmentLength; | |
675 | |
676 int xpos, i; | |
677 | |
678 // create an optimized horizontal scaling routine | |
679 | |
680 //code fragment | |
681 | |
682 asm volatile( | |
683 "jmp 9f \n\t" | |
684 // Begin | |
685 "0: \n\t" | |
686 "movq (%%esi), %%mm0 \n\t" //FIXME Alignment | |
687 "movq %%mm0, %%mm1 \n\t" | |
688 "psrlq $8, %%mm0 \n\t" | |
689 "punpcklbw %%mm7, %%mm1 \n\t" | |
690 "movq %%mm2, %%mm3 \n\t" | |
691 "punpcklbw %%mm7, %%mm0 \n\t" | |
692 "addw %%bx, %%cx \n\t" //2*xalpha += (4*lumXInc)&0xFFFF | |
693 "pshufw $0xFF, %%mm1, %%mm1 \n\t" | |
694 "1: \n\t" | |
695 "adcl %%edx, %%esi \n\t" //xx+= (4*lumXInc)>>16 + carry | |
696 "pshufw $0xFF, %%mm0, %%mm0 \n\t" | |
697 "2: \n\t" | |
698 "psrlw $9, %%mm3 \n\t" | |
699 "psubw %%mm1, %%mm0 \n\t" | |
700 "pmullw %%mm3, %%mm0 \n\t" | |
701 "paddw %%mm6, %%mm2 \n\t" // 2*alpha += xpos&0xFFFF | |
702 "psllw $7, %%mm1 \n\t" | |
703 "paddw %%mm1, %%mm0 \n\t" | |
704 | |
705 "movq %%mm0, (%%edi, %%eax) \n\t" | |
706 | |
707 "addl $8, %%eax \n\t" | |
708 // End | |
709 "9: \n\t" | |
710 // "int $3\n\t" | |
711 "leal 0b, %0 \n\t" | |
712 "leal 1b, %1 \n\t" | |
713 "leal 2b, %2 \n\t" | |
714 "decl %1 \n\t" | |
715 "decl %2 \n\t" | |
716 "subl %0, %1 \n\t" | |
717 "subl %0, %2 \n\t" | |
718 "leal 9b, %3 \n\t" | |
719 "subl %0, %3 \n\t" | |
720 :"=r" (fragment), "=r" (imm8OfPShufW1), "=r" (imm8OfPShufW2), | |
721 "=r" (fragmentLength) | |
722 ); | |
723 | |
724 xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers | |
725 | |
726 for(i=0; i<dstW/8; i++) | |
727 { | |
728 int xx=xpos>>16; | |
729 | |
730 if((i&3) == 0) | |
731 { | |
732 int a=0; | |
733 int b=((xpos+xInc)>>16) - xx; | |
734 int c=((xpos+xInc*2)>>16) - xx; | |
735 int d=((xpos+xInc*3)>>16) - xx; | |
736 | |
737 memcpy(funnyCode + fragmentLength*i/4, fragment, fragmentLength); | |
738 | |
739 funnyCode[fragmentLength*i/4 + imm8OfPShufW1]= | |
740 funnyCode[fragmentLength*i/4 + imm8OfPShufW2]= | |
741 a | (b<<2) | (c<<4) | (d<<6); | |
742 | |
743 // if we dont need to read 8 bytes than dont :), reduces the chance of | |
744 // crossing a cache line | |
745 if(d<3) funnyCode[fragmentLength*i/4 + 1]= 0x6E; | |
746 | |
747 funnyCode[fragmentLength*(i+4)/4]= RET; | |
748 } | |
749 xpos+=xInc; | |
750 } | |
751 } | |
752 #endif // ARCH_X86 | |
753 | |
754 //FIXME remove | |
505 void SwScale_Init(){ | 755 void SwScale_Init(){ |
756 } | |
757 | |
758 static void globalInit(){ | |
506 // generating tables: | 759 // generating tables: |
507 int i; | 760 int i; |
508 for(i=0; i<768; i++){ | 761 for(i=0; i<768; i++){ |
509 int c= MIN(MAX(i-256, 0), 255); | 762 int c= MIN(MAX(i-256, 0), 255); |
510 clip_table[i]=c; | 763 clip_table[i]=c; |
515 yuvtab_40cf[c]= clip_yuvtab_40cf[i]=0x40cf*(c-128); | 768 yuvtab_40cf[c]= clip_yuvtab_40cf[i]=0x40cf*(c-128); |
516 } | 769 } |
517 | 770 |
518 for(i=0; i<768; i++) | 771 for(i=0; i<768; i++) |
519 { | 772 { |
520 int v= clip_table[i]; | 773 int v= clip_table[i]; |
521 clip_table16b[i]= v>>3; | 774 clip_table16b[i]= v>>3; |
522 clip_table16g[i]= (v<<3)&0x07E0; | 775 clip_table16g[i]= (v<<3)&0x07E0; |
523 clip_table16r[i]= (v<<8)&0xF800; | 776 clip_table16r[i]= (v<<8)&0xF800; |
524 clip_table15b[i]= v>>3; | 777 clip_table15b[i]= v>>3; |
525 clip_table15g[i]= (v<<2)&0x03E0; | 778 clip_table15g[i]= (v<<2)&0x03E0; |
526 clip_table15r[i]= (v<<7)&0x7C00; | 779 clip_table15r[i]= (v<<7)&0x7C00; |
527 } | 780 } |
528 | 781 |
529 } | 782 cpuCaps= gCpuCaps; |
530 | 783 |
784 #ifdef RUNTIME_CPUDETECT | |
785 #ifdef CAN_COMPILE_X86_ASM | |
786 // ordered per speed fasterst first | |
787 if(gCpuCaps.hasMMX2) | |
788 swScale= swScale_MMX2; | |
789 else if(gCpuCaps.has3DNow) | |
790 swScale= swScale_3DNOW; | |
791 else if(gCpuCaps.hasMMX) | |
792 swScale= swScale_MMX; | |
793 else | |
794 swScale= swScale_C; | |
795 | |
796 #else | |
797 swScale= swScale_C; | |
798 cpuCaps.hasMMX2 = cpuCaps.hasMMX = cpuCaps.has3DNow = 0; | |
799 #endif | |
800 #else //RUNTIME_CPUDETECT | |
801 #ifdef HAVE_MMX2 | |
802 swScale= swScale_MMX2; | |
803 cpuCaps.has3DNow = 0; | |
804 #elif defined (HAVE_3DNOW) | |
805 swScale= swScale_3DNOW; | |
806 cpuCaps.hasMMX2 = 0; | |
807 #elif defined (HAVE_MMX) | |
808 swScale= swScale_MMX; | |
809 cpuCaps.hasMMX2 = cpuCaps.has3DNow = 0; | |
810 #else | |
811 swScale= swScale_C; | |
812 cpuCaps.hasMMX2 = cpuCaps.hasMMX = cpuCaps.has3DNow = 0; | |
813 #endif | |
814 #endif //!RUNTIME_CPUDETECT | |
815 } | |
816 | |
817 | |
818 SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags, | |
819 SwsFilter *srcFilter, SwsFilter *dstFilter){ | |
820 | |
821 const int widthAlign= dstFormat==IMGFMT_YV12 ? 16 : 8; | |
822 SwsContext *c; | |
823 int i; | |
824 //const int bytespp= (dstbpp+1)/8; //(12->1, 15&16->2, 24->3, 32->4) | |
825 //const int over= dstFormat==IMGFMT_YV12 ? (((dstW+15)&(~15))) - dststride | |
826 // : (((dstW+7)&(~7)))*bytespp - dststride; | |
827 if(swScale==NULL) globalInit(); | |
828 | |
829 /* sanity check */ | |
830 if(srcW<1 || srcH<1 || dstW<1 || dstH<1) return NULL; | |
831 if(srcW>=SWS_MAX_SIZE || dstW>=SWS_MAX_SIZE || srcH>=SWS_MAX_SIZE || dstH>=SWS_MAX_SIZE) | |
832 { | |
833 fprintf(stderr, "size is too large, increase SWS_MAX_SIZE\n"); | |
834 return NULL; | |
835 } | |
836 | |
837 /* FIXME | |
838 if(dstStride[0]%widthAlign !=0 ) | |
839 { | |
840 if(flags & SWS_PRINT_INFO) | |
841 fprintf(stderr, "SwScaler: Warning: dstStride is not a multiple of %d!\n" | |
842 "SwScaler: ->cannot do aligned memory acesses anymore\n", | |
843 widthAlign); | |
844 } | |
845 */ | |
846 c= memalign(64, sizeof(SwsContext)); | |
847 | |
848 c->srcW= srcW; | |
849 c->srcH= srcH; | |
850 c->dstW= dstW; | |
851 c->dstH= dstH; | |
852 c->lumXInc= ((srcW<<16) + (1<<15))/dstW; | |
853 c->lumYInc= ((srcH<<16) + (1<<15))/dstH; | |
854 c->flags= flags; | |
855 c->dstFormat= dstFormat; | |
856 c->srcFormat= srcFormat; | |
857 | |
858 if(cpuCaps.hasMMX2) | |
859 { | |
860 c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; | |
861 if(!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) | |
862 { | |
863 if(flags&SWS_PRINT_INFO) | |
864 fprintf(stderr, "SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n"); | |
865 } | |
866 } | |
867 else | |
868 c->canMMX2BeUsed=0; | |
869 | |
870 // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst | |
871 // but only for the FAST_BILINEAR mode otherwise do correct scaling | |
872 // n-2 is the last chrominance sample available | |
873 // this is not perfect, but noone shuld notice the difference, the more correct variant | |
874 // would be like the vertical one, but that would require some special code for the | |
875 // first and last pixel | |
876 if(flags&SWS_FAST_BILINEAR) | |
877 { | |
878 if(c->canMMX2BeUsed) c->lumXInc+= 20; | |
879 //we dont use the x86asm scaler if mmx is available | |
880 else if(cpuCaps.hasMMX) c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20; | |
881 } | |
882 | |
883 /* set chrXInc & chrDstW */ | |
884 if((flags&SWS_FULL_UV_IPOL) && dstFormat!=IMGFMT_YV12) | |
885 c->chrXInc= c->lumXInc>>1, c->chrDstW= dstW; | |
886 else | |
887 c->chrXInc= c->lumXInc, c->chrDstW= (dstW+1)>>1; | |
888 | |
889 /* set chrYInc & chrDstH */ | |
890 if(dstFormat==IMGFMT_YV12) c->chrYInc= c->lumYInc, c->chrDstH= (dstH+1)>>1; | |
891 else c->chrYInc= c->lumYInc>>1, c->chrDstH= dstH; | |
892 | |
893 /* precalculate horizontal scaler filter coefficients */ | |
894 { | |
895 const int filterAlign= cpuCaps.hasMMX ? 4 : 1; | |
896 | |
897 initFilter(c->hLumFilter, c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc, | |
898 srcW , dstW, filterAlign, 1<<14, flags); | |
899 initFilter(c->hChrFilter, c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc, | |
900 (srcW+1)>>1, c->chrDstW, filterAlign, 1<<14, flags); | |
901 | |
902 #ifdef ARCH_X86 | |
903 // cant downscale !!! | |
904 if(c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) | |
905 { | |
906 initMMX2HScaler( dstW, c->lumXInc, c->funnyYCode); | |
907 initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode); | |
908 } | |
909 #endif | |
910 } // Init Horizontal stuff | |
911 | |
912 | |
913 | |
914 /* precalculate vertical scaler filter coefficients */ | |
915 initFilter(c->vLumFilter, c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc, | |
916 srcH , dstH, 1, (1<<12)-4, flags); | |
917 initFilter(c->vChrFilter, c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc, | |
918 (srcH+1)>>1, c->chrDstH, 1, (1<<12)-4, flags); | |
919 | |
920 // Calculate Buffer Sizes so that they wont run out while handling these damn slices | |
921 c->vLumBufSize= c->vLumFilterSize; | |
922 c->vChrBufSize= c->vChrFilterSize; | |
923 for(i=0; i<dstH; i++) | |
924 { | |
925 int chrI= i*c->chrDstH / dstH; | |
926 int nextSlice= MAX(c->vLumFilterPos[i ] + c->vLumFilterSize - 1, | |
927 ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<1)); | |
928 nextSlice&= ~1; // Slices start at even boundaries | |
929 if(c->vLumFilterPos[i ] + c->vLumBufSize < nextSlice) | |
930 c->vLumBufSize= nextSlice - c->vLumFilterPos[i ]; | |
931 if(c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>1)) | |
932 c->vChrBufSize= (nextSlice>>1) - c->vChrFilterPos[chrI]; | |
933 } | |
934 | |
935 // allocate pixbufs (we use dynamic allocation because otherwise we would need to | |
936 // allocate several megabytes to handle all possible cases) | |
937 for(i=0; i<c->vLumBufSize; i++) | |
938 c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(8, 4000); | |
939 for(i=0; i<c->vChrBufSize; i++) | |
940 c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(8, 8000); | |
941 | |
942 //try to avoid drawing green stuff between the right end and the stride end | |
943 for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000); | |
944 for(i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000); | |
945 | |
946 ASSERT(c->chrDstH <= dstH) | |
947 ASSERT(c->vLumFilterSize* dstH*4 <= SWS_MAX_SIZE*20) | |
948 ASSERT(c->vChrFilterSize*c->chrDstH*4 <= SWS_MAX_SIZE*20) | |
949 | |
950 // pack filter data for mmx code | |
951 if(cpuCaps.hasMMX) | |
952 { | |
953 for(i=0; i<c->vLumFilterSize*dstH; i++) | |
954 c->lumMmxFilter[4*i]=c->lumMmxFilter[4*i+1]=c->lumMmxFilter[4*i+2]=c->lumMmxFilter[4*i+3]= | |
955 c->vLumFilter[i]; | |
956 for(i=0; i<c->vChrFilterSize*c->chrDstH; i++) | |
957 c->chrMmxFilter[4*i]=c->chrMmxFilter[4*i+1]=c->chrMmxFilter[4*i+2]=c->chrMmxFilter[4*i+3]= | |
958 c->vChrFilter[i]; | |
959 } | |
960 | |
961 if(flags&SWS_PRINT_INFO) | |
962 { | |
963 #ifdef DITHER1XBPP | |
964 char *dither= cpuCaps.hasMMX ? " dithered" : ""; | |
965 #endif | |
966 if(flags&SWS_FAST_BILINEAR) | |
967 fprintf(stderr, "\nSwScaler: FAST_BILINEAR scaler "); | |
968 else if(flags&SWS_BILINEAR) | |
969 fprintf(stderr, "\nSwScaler: BILINEAR scaler "); | |
970 else if(flags&SWS_BICUBIC) | |
971 fprintf(stderr, "\nSwScaler: BICUBIC scaler "); | |
972 else | |
973 fprintf(stderr, "\nSwScaler: ehh flags invalid?! "); | |
974 | |
975 if(dstFormat==IMGFMT_BGR15) | |
976 fprintf(stderr, "with%s BGR15 output ", dither); | |
977 else if(dstFormat==IMGFMT_BGR16) | |
978 fprintf(stderr, "with%s BGR16 output ", dither); | |
979 else if(dstFormat==IMGFMT_BGR24) | |
980 fprintf(stderr, "with BGR24 output "); | |
981 else if(dstFormat==IMGFMT_BGR32) | |
982 fprintf(stderr, "with BGR32 output "); | |
983 else if(dstFormat==IMGFMT_YV12) | |
984 fprintf(stderr, "with YV12 output "); | |
985 else | |
986 fprintf(stderr, "without output "); | |
987 | |
988 if(cpuCaps.hasMMX2) | |
989 fprintf(stderr, "using MMX2\n"); | |
990 else if(cpuCaps.has3DNow) | |
991 fprintf(stderr, "using 3DNOW\n"); | |
992 else if(cpuCaps.hasMMX) | |
993 fprintf(stderr, "using MMX\n"); | |
994 else | |
995 fprintf(stderr, "using C\n"); | |
996 } | |
997 | |
998 if((flags & SWS_PRINT_INFO) && verbose) | |
999 { | |
1000 if(cpuCaps.hasMMX) | |
1001 { | |
1002 if(c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR)) | |
1003 printf("SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n"); | |
1004 else | |
1005 { | |
1006 if(c->hLumFilterSize==4) | |
1007 printf("SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n"); | |
1008 else if(c->hLumFilterSize==8) | |
1009 printf("SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n"); | |
1010 else | |
1011 printf("SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n"); | |
1012 | |
1013 if(c->hChrFilterSize==4) | |
1014 printf("SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n"); | |
1015 else if(c->hChrFilterSize==8) | |
1016 printf("SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n"); | |
1017 else | |
1018 printf("SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n"); | |
1019 } | |
1020 } | |
1021 else | |
1022 { | |
1023 #ifdef ARCH_X86 | |
1024 printf("SwScaler: using X86-Asm scaler for horizontal scaling\n"); | |
1025 #else | |
1026 if(flags & SWS_FAST_BILINEAR) | |
1027 printf("SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n"); | |
1028 else | |
1029 printf("SwScaler: using C scaler for horizontal scaling\n"); | |
1030 #endif | |
1031 } | |
1032 if(dstFormat==IMGFMT_YV12) | |
1033 { | |
1034 if(c->vLumFilterSize==1) | |
1035 printf("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12)\n", cpuCaps.hasMMX ? "MMX" : "C"); | |
1036 else | |
1037 printf("SwScaler: using n-tap %s scaler for vertical scaling (YV12)\n", cpuCaps.hasMMX ? "MMX" : "C"); | |
1038 } | |
1039 else | |
1040 { | |
1041 if(c->vLumFilterSize==1 && c->vChrFilterSize==2) | |
1042 printf("SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n" | |
1043 "SwScaler: 2-tap scaler for vertical chrominance scaling (BGR)\n",cpuCaps.hasMMX ? "MMX" : "C"); | |
1044 else if(c->vLumFilterSize==2 && c->vChrFilterSize==2) | |
1045 printf("SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C"); | |
1046 else | |
1047 printf("SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C"); | |
1048 } | |
1049 | |
1050 if(dstFormat==IMGFMT_BGR24) | |
1051 printf("SwScaler: using %s YV12->BGR24 Converter\n", | |
1052 cpuCaps.hasMMX2 ? "MMX2" : (cpuCaps.hasMMX ? "MMX" : "C")); | |
1053 else | |
1054 printf("SwScaler: using %s YV12->BGR Converter\n", cpuCaps.hasMMX ? "MMX" : "C");//FIXME print format | |
1055 | |
1056 printf("SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); | |
1057 } | |
1058 | |
1059 return c; | |
1060 } | |
1061 | |
1062 /** | |
1063 * returns a normalized gaussian curve used to filter stuff | |
1064 * quality=3 is high quality, lowwer is lowwer quality | |
1065 */ | |
1066 double *getGaussian(double variance, double quality){ | |
1067 const int length= (int)(variance*quality + 0.5) | 1; | |
1068 int i; | |
1069 double *coeff= memalign(sizeof(double), length*sizeof(double)); | |
1070 double middle= (length-1)*0.5; | |
1071 | |
1072 for(i=0; i<length; i++) | |
1073 { | |
1074 double dist= i-middle; | |
1075 coeff[i]= exp( -dist*dist/(2*variance*variance) ) / sqrt(2*variance*PI); | |
1076 } | |
1077 | |
1078 normalize(coeff, length, 1.0); | |
1079 return coeff; | |
1080 } | |
1081 | |
1082 void normalize(double *coeff, int length, double height){ | |
1083 int i; | |
1084 double sum=0; | |
1085 double inv; | |
1086 | |
1087 for(i=0; i<length; i++) | |
1088 sum+= coeff[i]; | |
1089 | |
1090 inv= height/sum; | |
1091 | |
1092 for(i=0; i<length; i++) | |
1093 coeff[i]*= height; | |
1094 } | |
1095 | |
1096 double *conv(double *a, int aLength, double *b, int bLength){ | |
1097 int length= aLength + bLength - 1; | |
1098 double *coeff= memalign(sizeof(double), length*sizeof(double)); | |
1099 int i, j; | |
1100 | |
1101 for(i=0; i<length; i++) coeff[i]= 0.0; | |
1102 | |
1103 for(i=0; i<aLength; i++) | |
1104 { | |
1105 for(j=0; j<bLength; j++) | |
1106 { | |
1107 coeff[i+j]+= a[i]*b[j]; | |
1108 } | |
1109 } | |
1110 | |
1111 return coeff; | |
1112 } | |
1113 | |
1114 /* | |
1115 double *sum(double *a, int aLength, double *b, int bLength){ | |
1116 int length= MAX(aLength, bLength); | |
1117 double *coeff= memalign(sizeof(double), length*sizeof(double)); | |
1118 int i; | |
1119 | |
1120 for(i=0; i<length; i++) coeff[i]= 0.0; | |
1121 | |
1122 for(i=0; i<aLength; i++) coeff[i]+= a[i]; | |
1123 } | |
1124 */ |