comparison TOOLS/subfont-c/subfont.c @ 7028:9d4273713562

40% faster blur
author michael
date Fri, 16 Aug 2002 22:36:54 +0000
parents 273e374311c6
children 660a8439c679
comparison
equal deleted inserted replaced
7027:c9a4dfaa9868 7028:9d4273713562
612 612
613 613
614 // gaussian blur 614 // gaussian blur
615 void blur( 615 void blur(
616 unsigned char *buffer, 616 unsigned char *buffer,
617 unsigned char *tmp, 617 unsigned short *tmp2,
618 int width, 618 int width,
619 int height, 619 int height,
620 int *m, 620 int *m,
621 int *m2,
621 int r, 622 int r,
622 int mwidth, 623 int mwidth,
623 unsigned volume) { 624 unsigned volume) {
624 625
625 int x, y; 626 int x, y;
626 627
628 #if 0
629 unsigned char *s = buffer;
630 unsigned short *t = tmp2+1;
631 for(y=0; y<height; y++){
632 memset(t, 0, (width+1)*sizeof(short));
633 // for(x=0; x<width+1; x++)
634 // t[x]= 128;
635
636 for(x=0; x<r; x++){
637 const int src= s[x];
638 if(src){
639 register unsigned short *dstp= t + x-r;
640 int mx;
641 unsigned *m3= m2 + src*mwidth;
642 for(mx=r-x; mx<mwidth; mx++){
643 dstp[mx]+= m3[mx];
644 }
645 }
646 }
647 for(; x<width-r; x++){
648 const int src= s[x];
649 if(src){
650 register unsigned short *dstp= t + x-r;
651 int mx;
652 unsigned *m3= m2 + src*mwidth;
653 for(mx=0; mx<mwidth; mx++){
654 dstp[mx]+= m3[mx];
655 }
656 }
657 }
658 for(; x<width; x++){
659 const int src= s[x];
660 if(src){
661 register unsigned short *dstp= t + x-r;
662 int mx;
663 const int x2= r+width -x;
664 const int off= src*mwidth;
665 unsigned *m3= m2 + src*mwidth;
666 for(mx=0; mx<x2; mx++){
667 dstp[mx]+= m3[mx];
668 }
669 }
670 }
671 s+= width;
672 t+= width + 1;
673 }
674
675 t = tmp2;
676 for(x=0; x<width; x++){
677 for(y=0; y<r; y++){
678 unsigned short *srcp= t + y*(width+1) + 1;
679 int src= *srcp;
680 if(src){
681 register unsigned short *dstp= srcp - 1 + width+1;
682 const int src2= (src + 128)>>8;
683 unsigned *m3= m2 + src2*mwidth;
684
685 int mx;
686 *srcp= 128;
687 for(mx=r-1; mx<mwidth; mx++){
688 *dstp += m3[mx];
689 dstp+= width+1;
690 }
691 }
692 }
693 for(; y<height-r; y++){
694 unsigned short *srcp= t + y*(width+1) + 1;
695 int src= *srcp;
696 if(src){
697 register unsigned short *dstp= srcp - 1 - r*(width+1);
698 const int src2= (src + 128)>>8;
699 unsigned *m3= m2 + src2*mwidth;
700
701 int mx;
702 *srcp= 128;
703 for(mx=0; mx<mwidth; mx++){
704 *dstp += m3[mx];
705 dstp+= width+1;
706 }
707 }
708 }
709 for(; y<height; y++){
710 unsigned short *srcp= t + y*(width+1) + 1;
711 int src= *srcp;
712 if(src){
713 const int y2=r+height-y;
714 register unsigned short *dstp= srcp - 1 - r*(width+1);
715 const int src2= (src + 128)>>8;
716 unsigned *m3= m2 + src2*mwidth;
717
718 int mx;
719 *srcp= 128;
720 for(mx=0; mx<y2; mx++){
721 *dstp += m3[mx];
722 dstp+= width+1;
723 }
724 }
725 }
726 t++;
727 }
728
729 t = tmp2;
730 s = buffer;
731 for(y=0; y<height; y++){
732 for(x=0; x<width; x++){
733 s[x]= t[x]>>8;
734 }
735 s+= width;
736 t+= width + 1;
737 }
738 #else
739 unsigned char *tmp = (unsigned char*)tmp2;
627 unsigned char *s = buffer - r; 740 unsigned char *s = buffer - r;
628 unsigned char *t = tmp; 741 unsigned char *t = tmp;
629 742
630 int *m_end=m+256*mwidth; 743 int *m_end=m+256*mwidth;
631 744
634 unsigned sum = 65536/2; 747 unsigned sum = 65536/2;
635 int x1 = (x<r) ? r-x:0; 748 int x1 = (x<r) ? r-x:0;
636 int x2 = (x+r>=width) ? (r+width-x):mwidth; 749 int x2 = (x+r>=width) ? (r+width-x):mwidth;
637 unsigned* mp = m + 256*x1; 750 unsigned* mp = m + 256*x1;
638 int mx; 751 int mx;
752
639 for (mx = x1; mx<x2; ++mx, mp+=256) sum+= mp[s[mx]]; 753 for (mx = x1; mx<x2; ++mx, mp+=256) sum+= mp[s[mx]];
640 *t = sum>>16; 754 *t = sum>>16;
641 } 755 }
642 } 756 }
757
643 tmp -= r*width; 758 tmp -= r*width;
644
645 for (x = 0; x<width; ++x, ++tmp, ++buffer) { 759 for (x = 0; x<width; ++x, ++tmp, ++buffer) {
646 int y1max=(r<height)?r:height; 760 int y1max=(r<height)?r:height;
647 int y2min=height-r; 761 int y2min=height-r;
648 if(y2min<y1max) y2min=y1max; 762 if(y2min<y1max) y2min=y1max;
649 s = tmp; 763 s = tmp;
692 for (; my<y2; ++my, smy+= width, mp+=256) sum+=mp[*smy]; 806 for (; my<y2; ++my, smy+= width, mp+=256) sum+=mp[*smy];
693 *t = sum>>16; 807 *t = sum>>16;
694 } 808 }
695 #endif 809 #endif
696 } 810 }
811 #endif
697 } 812 }
698 813
699 814
700 // Gaussian matrix 815 // Gaussian matrix
701 // Maybe for future use. 816 // Maybe for future use.
725 int const o_r = ceil(thickness); 840 int const o_r = ceil(thickness);
726 int const g_w = 2*g_r+1; // matrix size 841 int const g_w = 2*g_r+1; // matrix size
727 int const o_w = 2*o_r+1; // matrix size 842 int const o_w = 2*o_r+1; // matrix size
728 int const o_size = o_w * o_w; 843 int const o_size = o_w * o_w;
729 double const A = log(1.0/base)/(radius*radius*2); 844 double const A = log(1.0/base)/(radius*radius*2);
845 double volume_factor=0.0;
846 double volume_diff;
730 847
731 int mx, my, i; 848 int mx, my, i;
732 unsigned volume = 0; // volume under Gaussian area is exactly -pi*base/A 849 unsigned volume = 0; // volume under Gaussian area is exactly -pi*base/A
733 850
734 unsigned *g = (unsigned*)malloc(g_w * sizeof(unsigned)); 851 unsigned *g = (unsigned*)malloc(g_w * sizeof(unsigned));
735 unsigned *gt = (unsigned*)malloc(256 * g_w * sizeof(unsigned)); 852 unsigned *gt = (unsigned*)malloc(256 * g_w * sizeof(unsigned));
853 unsigned *gt2 = (unsigned*)malloc(256 * g_w * sizeof(unsigned));
736 unsigned *om = (unsigned*)malloc(o_w*o_w * sizeof(unsigned)); 854 unsigned *om = (unsigned*)malloc(o_w*o_w * sizeof(unsigned));
737 unsigned char *omt = malloc(o_size*256); 855 unsigned char *omt = malloc(o_size*256);
738 unsigned char *omtp = omt; 856 unsigned char *omtp = omt;
739 857 unsigned short *tmp = malloc((width+1)*height*sizeof(short));
740 if (g==NULL || gt==NULL || om==NULL || omt==NULL) ERROR("malloc failed."); 858
741 859 if (g==NULL || gt==NULL || gt2==NULL || om==NULL || omt==NULL) ERROR("malloc failed.");
742 // gaussian curve 860
861 // gaussian curve with volume = 256
862 for (volume_diff=10000000; volume_diff>0.0000001; volume_diff*=0.5){
863 volume_factor+= volume_diff;
864 volume=0;
865 for (i = 0; i<g_w; ++i) {
866 g[i] = (unsigned)(exp(A * (i-g_r)*(i-g_r)) * volume_factor + .5);
867 volume+= g[i];
868 }
869 if(volume>256) volume_factor-= volume_diff;
870 }
871 volume=0;
743 for (i = 0; i<g_w; ++i) { 872 for (i = 0; i<g_w; ++i) {
744 g[i] = (unsigned)(exp(A * (i-g_r)*(i-g_r)) * base + .5); 873 g[i] = (unsigned)(exp(A * (i-g_r)*(i-g_r)) * volume_factor + .5);
745 volume+= g[i]; 874 volume+= g[i];
746 if (DEBUG) eprintf("%3i ", g[i]); 875 if (DEBUG) eprintf("%3i ", g[i]);
747 } 876 }
877
748 //volume *= volume; 878 //volume *= volume;
749 if (DEBUG) eprintf("\n"); 879 if (DEBUG) eprintf("\n");
750 880
751 // gauss table: 881 // gauss table:
752 for(mx=0;mx<g_w;mx++){ 882 for(mx=0;mx<g_w;mx++){
753 for(i=0;i<256;i++) 883 for(i=0;i<256;i++){
754 gt[256*mx+i] = (i*g[mx]*65536+(volume/2))/volume; 884 gt[256*mx+i] = (i*g[mx]*65536+(volume/2))/volume;
885 gt2[mx+i*g_w] = i*g[mx];
886 }
755 } 887 }
756 888
757 /* outline matrix */ 889 /* outline matrix */
758 for (my = 0; my<o_w; ++my) { 890 for (my = 0; my<o_w; ++my) {
759 for (mx = 0; mx<o_w; ++mx) { 891 for (mx = 0; mx<o_w; ++mx) {
780 ttime=GetTimer()-ttime; 912 ttime=GetTimer()-ttime;
781 printf("outline: %7d us\n",ttime); 913 printf("outline: %7d us\n",ttime);
782 914
783 ttime=GetTimer(); 915 ttime=GetTimer();
784 // blur(abuffer, bbuffer, width, height, g, g_r, g_w, volume); 916 // blur(abuffer, bbuffer, width, height, g, g_r, g_w, volume);
785 blur(abuffer, bbuffer, width, height, gt, g_r, g_w, volume); 917 blur(abuffer, tmp, width, height, gt, gt2, g_r, g_w, volume);
786 ttime=GetTimer()-ttime; 918 ttime=GetTimer()-ttime;
787 printf("gauss: %7d us\n",ttime); 919 printf("gauss: %7d us\n",ttime);
788 920
789 free(g); 921 free(g);
790 free(om); 922 free(om);