comparison h264pred.c @ 8373:5961d2deb35f libavcodec

1.6x faster pred*_vertical_add_c
author lorenm
date Thu, 18 Dec 2008 01:05:06 +0000
parents 920ded20b469
children e9d9d946f213
comparison
equal deleted inserted replaced
8372:5917f39590e3 8373:5961d2deb35f
987 #undef PL 987 #undef PL
988 #undef SRC 988 #undef SRC
989 989
990 static void pred4x4_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){ 990 static void pred4x4_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){
991 int i; 991 int i;
992 pix -= stride;
992 for(i=0; i<4; i++){ 993 for(i=0; i<4; i++){
993 pix[0]= pix[0-stride] + block[0]; 994 uint8_t v = pix[0];
994 pix[1]= pix[1-stride] + block[1]; 995 pix[1*stride]= v += block[0];
995 pix[2]= pix[2-stride] + block[2]; 996 pix[2*stride]= v += block[4];
996 pix[3]= pix[3-stride] + block[3]; 997 pix[3*stride]= v += block[8];
998 pix[4*stride]= v += block[12];
999 pix++;
1000 block++;
1001 }
1002 }
1003
1004 static void pred4x4_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){
1005 int i;
1006 for(i=0; i<4; i++){
1007 uint8_t v = pix[-1];
1008 pix[0]= v += block[0];
1009 pix[1]= v += block[1];
1010 pix[2]= v += block[2];
1011 pix[3]= v += block[3];
997 pix+= stride; 1012 pix+= stride;
998 block+= 4; 1013 block+= 4;
999 } 1014 }
1000 } 1015 }
1001 1016
1002 static void pred4x4_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){
1003 int i;
1004 for(i=0; i<4; i++){
1005 pix[0]= pix[-1] + block[0];
1006 pix[1]= pix[ 0] + block[1];
1007 pix[2]= pix[ 1] + block[2];
1008 pix[3]= pix[ 2] + block[3];
1009 pix+= stride;
1010 block+= 4;
1011 }
1012 }
1013
1014 static void pred8x8l_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){ 1017 static void pred8x8l_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){
1015 int i; 1018 int i;
1019 pix -= stride;
1016 for(i=0; i<8; i++){ 1020 for(i=0; i<8; i++){
1017 pix[0]= pix[0-stride] + block[0]; 1021 uint8_t v = pix[0];
1018 pix[1]= pix[1-stride] + block[1]; 1022 pix[1*stride]= v += block[0];
1019 pix[2]= pix[2-stride] + block[2]; 1023 pix[2*stride]= v += block[8];
1020 pix[3]= pix[3-stride] + block[3]; 1024 pix[3*stride]= v += block[16];
1021 pix[4]= pix[4-stride] + block[4]; 1025 pix[4*stride]= v += block[24];
1022 pix[5]= pix[5-stride] + block[5]; 1026 pix[5*stride]= v += block[32];
1023 pix[6]= pix[6-stride] + block[6]; 1027 pix[6*stride]= v += block[40];
1024 pix[7]= pix[7-stride] + block[7]; 1028 pix[7*stride]= v += block[48];
1025 pix+= stride; 1029 pix[8*stride]= v += block[56];
1026 block+= 8; 1030 pix++;
1031 block++;
1027 } 1032 }
1028 } 1033 }
1029 1034
1030 static void pred8x8l_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){ 1035 static void pred8x8l_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){
1031 int i; 1036 int i;
1032 for(i=0; i<8; i++){ 1037 for(i=0; i<8; i++){
1033 pix[0]= pix[-1] + block[0]; 1038 uint8_t v = pix[-1];
1034 pix[1]= pix[ 0] + block[1]; 1039 pix[0]= v += block[0];
1035 pix[2]= pix[ 1] + block[2]; 1040 pix[1]= v += block[1];
1036 pix[3]= pix[ 2] + block[3]; 1041 pix[2]= v += block[2];
1037 pix[4]= pix[ 3] + block[4]; 1042 pix[3]= v += block[3];
1038 pix[5]= pix[ 4] + block[5]; 1043 pix[4]= v += block[4];
1039 pix[6]= pix[ 5] + block[6]; 1044 pix[5]= v += block[5];
1040 pix[7]= pix[ 6] + block[7]; 1045 pix[6]= v += block[6];
1046 pix[7]= v += block[7];
1041 pix+= stride; 1047 pix+= stride;
1042 block+= 8; 1048 block+= 8;
1043 } 1049 }
1044 } 1050 }
1045 1051