comparison libmpcodecs/vf_eq.c @ 7078:ba46984544bb

optimization
author michael
date Fri, 23 Aug 2002 21:55:57 +0000
parents 7685130ba4bd
children ebc1cd8014a5
comparison
equal deleted inserted replaced
7077:a24f91c247cd 7078:ba46984544bb
29 int pel; 29 int pel;
30 int dstep = dstride-w; 30 int dstep = dstride-w;
31 int sstep = sstride-w; 31 int sstep = sstride-w;
32 short brvec[4]; 32 short brvec[4];
33 short contvec[4]; 33 short contvec[4];
34 short centvec[4] = { -128, -128, -128, -128 }; 34
35 35 contrast = ((contrast+100)*256*16)/100;
36 brightness = ((brightness+100)*511)/200-128; 36 brightness = ((brightness+100)*511)/200-128 - contrast/32;
37 contrast = ((contrast+100)*256)/200;
38 37
39 brvec[0] = brvec[1] = brvec[2] = brvec[3] = brightness; 38 brvec[0] = brvec[1] = brvec[2] = brvec[3] = brightness;
40 contvec[0] = contvec[1] = contvec[2] = contvec[3] = contrast; 39 contvec[0] = contvec[1] = contvec[2] = contvec[3] = contrast;
41 40
42 while (h--) { 41 while (h--) {
43 asm ( 42 asm volatile (
44 "movq (%5), %%mm3 \n\t" 43 "movq (%5), %%mm3 \n\t"
45 "movq (%6), %%mm4 \n\t" 44 "movq (%6), %%mm4 \n\t"
46 "movq (%7), %%mm5 \n\t"
47 "pxor %%mm0, %%mm0 \n\t" 45 "pxor %%mm0, %%mm0 \n\t"
46 "movl %4, %%eax\n\t"
48 ".align 16 \n\t" 47 ".align 16 \n\t"
49 "1: \n\t" 48 "1: \n\t"
50 "movq (%0), %%mm1 \n\t" 49 "movq (%0), %%mm1 \n\t"
51 "movq (%0), %%mm2 \n\t" 50 "movq (%0), %%mm2 \n\t"
52 "punpcklbw %%mm0, %%mm1 \n\t" 51 "punpcklbw %%mm0, %%mm1 \n\t"
53 "punpckhbw %%mm0, %%mm2 \n\t" 52 "punpckhbw %%mm0, %%mm2 \n\t"
54 "paddw %%mm5, %%mm1 \n\t" 53 "psllw $4, %%mm1 \n\t"
55 "paddw %%mm5, %%mm2 \n\t" 54 "psllw $4, %%mm2 \n\t"
56 "pmullw %%mm4, %%mm1 \n\t" 55 "pmulhw %%mm4, %%mm1 \n\t"
57 "pmullw %%mm4, %%mm2 \n\t" 56 "pmulhw %%mm4, %%mm2 \n\t"
58 "psraw $7, %%mm1 \n\t" 57 "paddw %%mm3, %%mm1 \n\t"
59 "psraw $7, %%mm2 \n\t" 58 "paddw %%mm3, %%mm2 \n\t"
60 "paddsw %%mm3, %%mm1 \n\t"
61 "paddsw %%mm3, %%mm2 \n\t"
62 "packuswb %%mm2, %%mm1 \n\t" 59 "packuswb %%mm2, %%mm1 \n\t"
63 "addl $8, %0 \n\t" 60 "addl $8, %0 \n\t"
64 "movq %%mm1, (%1) \n\t" 61 "movq %%mm1, (%1) \n\t"
65 "addl $8, %1 \n\t" 62 "addl $8, %1 \n\t"
66 "decl %4 \n\t" 63 "decl %%eax \n\t"
67 "jnz 1b \n\t" 64 "jnz 1b \n\t"
68 : "=r" (src), "=r" (dest) 65 : "=r" (src), "=r" (dest)
69 : "0" (src), "1" (dest), "r" (w/8), "r" (brvec), "r" (contvec), "r" (centvec) 66 : "0" (src), "1" (dest), "r" (w>>3), "r" (brvec), "r" (contvec)
67 : "%eax"
70 ); 68 );
69
71 for (i = w&7; i; i--) 70 for (i = w&7; i; i--)
72 { 71 {
73 pel = ((*src++ - 128) * contrast)/256 + brightness; 72 pel = ((*src++* contrast)>>12) + brightness;
74 *dest++ = pel > 255 ? 255 : (pel < 0 ? 0 : pel); 73 if(pel&768) pel = (-pel)>>31;
75 } 74 *dest++ = pel;
75 }
76
76 src += sstep; 77 src += sstep;
77 dest += dstep; 78 dest += dstep;
78 } 79 }
79 asm volatile ( "emms \n\t" ::: "memory" ); 80 asm volatile ( "emms \n\t" ::: "memory" );
80 } 81 }
86 int i; 87 int i;
87 int pel; 88 int pel;
88 int dstep = dstride-w; 89 int dstep = dstride-w;
89 int sstep = sstride-w; 90 int sstep = sstride-w;
90 91
91 brightness = ((brightness+100)*511)/200-128; 92 contrast = ((contrast+100)*256*256)/100;
92 contrast = ((contrast+100)*256)/200; 93 brightness = ((brightness+100)*511)/200-128 - contrast/512;
93 94
94 while (h--) { 95 while (h--) {
95 for (i = w; i; i--) 96 for (i = w; i; i--)
96 { 97 {
97 pel = ((*src++ - 128) * contrast)/128 + brightness; 98 pel = ((*src++* contrast)>>16) + brightness;
98 *dest++ = pel > 255 ? 255 : (pel < 0 ? 0 : pel); 99 if(pel&768) pel = (-pel)>>31;
100 *dest++ = pel;
99 } 101 }
100 src += sstep; 102 src += sstep;
101 dest += dstep; 103 dest += dstep;
102 } 104 }
103 } 105 }