Mercurial > mplayer.hg
comparison TOOLS/subfont-c/subfont.c @ 7023:273e374311c6
speedup of gauss
author | arpi |
---|---|
date | Fri, 16 Aug 2002 21:02:50 +0000 |
parents | ef9f4c1a3345 |
children | 9d4273713562 |
comparison
equal
deleted
inserted
replaced
7022:ef9f4c1a3345 | 7023:273e374311c6 |
---|---|
624 | 624 |
625 int x, y; | 625 int x, y; |
626 | 626 |
627 unsigned char *s = buffer - r; | 627 unsigned char *s = buffer - r; |
628 unsigned char *t = tmp; | 628 unsigned char *t = tmp; |
629 | |
630 int *m_end=m+256*mwidth; | |
631 | |
629 for (y = 0; y<height; ++y) { | 632 for (y = 0; y<height; ++y) { |
630 for (x = 0; x<width; ++x, ++s, ++t) { | 633 for (x = 0; x<width; ++x, ++s, ++t) { |
631 unsigned sum = 0; | 634 unsigned sum = 65536/2; |
632 int x1 = (x<r) ? r-x:0; | 635 int x1 = (x<r) ? r-x:0; |
633 int x2 = (x+r>=width) ? (r+width-x):mwidth; | 636 int x2 = (x+r>=width) ? (r+width-x):mwidth; |
637 unsigned* mp = m + 256*x1; | |
634 int mx; | 638 int mx; |
635 for (mx = x1; mx<x2; ++mx) | 639 for (mx = x1; mx<x2; ++mx, mp+=256) sum+= mp[s[mx]]; |
636 sum+= s[mx] * m[mx]; | 640 *t = sum>>16; |
637 *t = (sum + volume/2) / volume; | |
638 //*t = sum; | |
639 } | 641 } |
640 } | 642 } |
641 tmp -= r*width; | 643 tmp -= r*width; |
644 | |
642 for (x = 0; x<width; ++x, ++tmp, ++buffer) { | 645 for (x = 0; x<width; ++x, ++tmp, ++buffer) { |
646 int y1max=(r<height)?r:height; | |
647 int y2min=height-r; | |
648 if(y2min<y1max) y2min=y1max; | |
643 s = tmp; | 649 s = tmp; |
644 t = buffer; | 650 t = buffer; |
651 #if 0 | |
645 for (y = 0; y<height; ++y, s+= width, t+= width) { | 652 for (y = 0; y<height; ++y, s+= width, t+= width) { |
646 unsigned sum = 0; | 653 unsigned sum = 65536/2; |
647 int y1 = (y<r) ? r-y:0; | 654 int y1 = (y<r) ? r-y:0; |
648 int y2 = (y+r>=height) ? (r+height-y):mwidth; | 655 int y2 = (y+r>=height) ? (r+height-y):mwidth; |
656 register unsigned *mp = m + 256*y1; | |
657 register unsigned char *smy = s + y1*width; | |
658 int my; | |
659 for (my = y1; my<y2; ++my, smy+= width, mp+=256) | |
660 sum+= mp[*smy]; | |
661 *t = sum>>16; | |
662 } | |
663 #else | |
664 // pass 1: 0..r | |
665 for (y = 0; y<y1max; ++y, s+= width, t+= width) { | |
666 unsigned sum = 65536/2; | |
667 int y1 = r-y; | |
668 int my = y1; | |
669 int y2 = (y+r>=height) ? (r+height-y):mwidth; | |
649 unsigned char *smy = s + y1*width; | 670 unsigned char *smy = s + y1*width; |
650 int my; | 671 unsigned* mp = m + 256*y1; |
651 for (my = y1; my<y2; ++my, smy+= width) | 672 for (; my<y2; ++my, smy+= width, mp+=256) sum+=mp[*smy]; |
652 sum+= *smy * m[my]; | 673 *t = sum>>16; |
653 *t = (sum + volume/2) / volume; | 674 } |
654 } | 675 // pass 2: r..(height-r) |
676 for (; y<y2min; ++y, s+= width, t+= width) { | |
677 unsigned sum = 65536/2; | |
678 unsigned char *smy = s; | |
679 unsigned* mp = m; | |
680 // int my=0; | |
681 // for (; my<mwidth; ++my, smy+=width, mp+=256) sum+=mp[*smy]; | |
682 for (; mp<m_end; smy+=width, mp+=256) sum+=mp[*smy]; | |
683 *t = sum>>16; | |
684 } | |
685 // pass 3: (height-r)..height | |
686 for (; y<height; ++y, s+= width, t+= width) { | |
687 unsigned sum = 65536/2; | |
688 int y2 = r+height-y; | |
689 unsigned char *smy = s; | |
690 unsigned* mp = m; | |
691 int my=0; | |
692 for (; my<y2; ++my, smy+= width, mp+=256) sum+=mp[*smy]; | |
693 *t = sum>>16; | |
694 } | |
695 #endif | |
655 } | 696 } |
656 } | 697 } |
657 | 698 |
658 | 699 |
659 // Gaussian matrix | 700 // Gaussian matrix |
689 | 730 |
690 int mx, my, i; | 731 int mx, my, i; |
691 unsigned volume = 0; // volume under Gaussian area is exactly -pi*base/A | 732 unsigned volume = 0; // volume under Gaussian area is exactly -pi*base/A |
692 | 733 |
693 unsigned *g = (unsigned*)malloc(g_w * sizeof(unsigned)); | 734 unsigned *g = (unsigned*)malloc(g_w * sizeof(unsigned)); |
735 unsigned *gt = (unsigned*)malloc(256 * g_w * sizeof(unsigned)); | |
694 unsigned *om = (unsigned*)malloc(o_w*o_w * sizeof(unsigned)); | 736 unsigned *om = (unsigned*)malloc(o_w*o_w * sizeof(unsigned)); |
695 unsigned char *omt = malloc(o_size*256); | 737 unsigned char *omt = malloc(o_size*256); |
696 unsigned char *omtp = omt; | 738 unsigned char *omtp = omt; |
697 | 739 |
698 if (g==NULL || om==NULL || omt==NULL) ERROR("malloc failed."); | 740 if (g==NULL || gt==NULL || om==NULL || omt==NULL) ERROR("malloc failed."); |
699 | 741 |
700 // gaussian curve | 742 // gaussian curve |
701 for (i = 0; i<g_w; ++i) { | 743 for (i = 0; i<g_w; ++i) { |
702 g[i] = (unsigned)(exp(A * (i-g_r)*(i-g_r)) * base + .5); | 744 g[i] = (unsigned)(exp(A * (i-g_r)*(i-g_r)) * base + .5); |
703 volume+= g[i]; | 745 volume+= g[i]; |
704 if (DEBUG) eprintf("%3i ", g[i]); | 746 if (DEBUG) eprintf("%3i ", g[i]); |
705 } | 747 } |
706 //volume *= volume; | 748 //volume *= volume; |
707 if (DEBUG) eprintf("\n"); | 749 if (DEBUG) eprintf("\n"); |
750 | |
751 // gauss table: | |
752 for(mx=0;mx<g_w;mx++){ | |
753 for(i=0;i<256;i++) | |
754 gt[256*mx+i] = (i*g[mx]*65536+(volume/2))/volume; | |
755 } | |
708 | 756 |
709 /* outline matrix */ | 757 /* outline matrix */ |
710 for (my = 0; my<o_w; ++my) { | 758 for (my = 0; my<o_w; ++my) { |
711 for (mx = 0; mx<o_w; ++mx) { | 759 for (mx = 0; mx<o_w; ++mx) { |
712 // antialiased circle would be perfect here, but this one is good enough | 760 // antialiased circle would be perfect here, but this one is good enough |
731 //outline(bbuffer, abuffer, width, height, gm, g_r, g_w); // Gaussian outline | 779 //outline(bbuffer, abuffer, width, height, gm, g_r, g_w); // Gaussian outline |
732 ttime=GetTimer()-ttime; | 780 ttime=GetTimer()-ttime; |
733 printf("outline: %7d us\n",ttime); | 781 printf("outline: %7d us\n",ttime); |
734 | 782 |
735 ttime=GetTimer(); | 783 ttime=GetTimer(); |
736 blur(abuffer, bbuffer, width, height, g, g_r, g_w, volume); | 784 // blur(abuffer, bbuffer, width, height, g, g_r, g_w, volume); |
785 blur(abuffer, bbuffer, width, height, gt, g_r, g_w, volume); | |
737 ttime=GetTimer()-ttime; | 786 ttime=GetTimer()-ttime; |
738 printf("gauss: %7d us\n",ttime); | 787 printf("gauss: %7d us\n",ttime); |
739 | 788 |
740 free(g); | 789 free(g); |
741 free(om); | 790 free(om); |