Mercurial > libavcodec.hg
comparison h264pred.c @ 8373:5961d2deb35f libavcodec
1.6x faster pred*_vertical_add_c
author | lorenm |
---|---|
date | Thu, 18 Dec 2008 01:05:06 +0000 |
parents | 920ded20b469 |
children | e9d9d946f213 |
comparison
equal
deleted
inserted
replaced
8372:5917f39590e3 | 8373:5961d2deb35f |
---|---|
987 #undef PL | 987 #undef PL |
988 #undef SRC | 988 #undef SRC |
989 | 989 |
990 static void pred4x4_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){ | 990 static void pred4x4_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){ |
991 int i; | 991 int i; |
992 pix -= stride; | |
992 for(i=0; i<4; i++){ | 993 for(i=0; i<4; i++){ |
993 pix[0]= pix[0-stride] + block[0]; | 994 uint8_t v = pix[0]; |
994 pix[1]= pix[1-stride] + block[1]; | 995 pix[1*stride]= v += block[0]; |
995 pix[2]= pix[2-stride] + block[2]; | 996 pix[2*stride]= v += block[4]; |
996 pix[3]= pix[3-stride] + block[3]; | 997 pix[3*stride]= v += block[8]; |
998 pix[4*stride]= v += block[12]; | |
999 pix++; | |
1000 block++; | |
1001 } | |
1002 } | |
1003 | |
1004 static void pred4x4_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){ | |
1005 int i; | |
1006 for(i=0; i<4; i++){ | |
1007 uint8_t v = pix[-1]; | |
1008 pix[0]= v += block[0]; | |
1009 pix[1]= v += block[1]; | |
1010 pix[2]= v += block[2]; | |
1011 pix[3]= v += block[3]; | |
997 pix+= stride; | 1012 pix+= stride; |
998 block+= 4; | 1013 block+= 4; |
999 } | 1014 } |
1000 } | 1015 } |
1001 | 1016 |
1002 static void pred4x4_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){ | |
1003 int i; | |
1004 for(i=0; i<4; i++){ | |
1005 pix[0]= pix[-1] + block[0]; | |
1006 pix[1]= pix[ 0] + block[1]; | |
1007 pix[2]= pix[ 1] + block[2]; | |
1008 pix[3]= pix[ 2] + block[3]; | |
1009 pix+= stride; | |
1010 block+= 4; | |
1011 } | |
1012 } | |
1013 | |
1014 static void pred8x8l_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){ | 1017 static void pred8x8l_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){ |
1015 int i; | 1018 int i; |
1019 pix -= stride; | |
1016 for(i=0; i<8; i++){ | 1020 for(i=0; i<8; i++){ |
1017 pix[0]= pix[0-stride] + block[0]; | 1021 uint8_t v = pix[0]; |
1018 pix[1]= pix[1-stride] + block[1]; | 1022 pix[1*stride]= v += block[0]; |
1019 pix[2]= pix[2-stride] + block[2]; | 1023 pix[2*stride]= v += block[8]; |
1020 pix[3]= pix[3-stride] + block[3]; | 1024 pix[3*stride]= v += block[16]; |
1021 pix[4]= pix[4-stride] + block[4]; | 1025 pix[4*stride]= v += block[24]; |
1022 pix[5]= pix[5-stride] + block[5]; | 1026 pix[5*stride]= v += block[32]; |
1023 pix[6]= pix[6-stride] + block[6]; | 1027 pix[6*stride]= v += block[40]; |
1024 pix[7]= pix[7-stride] + block[7]; | 1028 pix[7*stride]= v += block[48]; |
1025 pix+= stride; | 1029 pix[8*stride]= v += block[56]; |
1026 block+= 8; | 1030 pix++; |
1031 block++; | |
1027 } | 1032 } |
1028 } | 1033 } |
1029 | 1034 |
1030 static void pred8x8l_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){ | 1035 static void pred8x8l_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){ |
1031 int i; | 1036 int i; |
1032 for(i=0; i<8; i++){ | 1037 for(i=0; i<8; i++){ |
1033 pix[0]= pix[-1] + block[0]; | 1038 uint8_t v = pix[-1]; |
1034 pix[1]= pix[ 0] + block[1]; | 1039 pix[0]= v += block[0]; |
1035 pix[2]= pix[ 1] + block[2]; | 1040 pix[1]= v += block[1]; |
1036 pix[3]= pix[ 2] + block[3]; | 1041 pix[2]= v += block[2]; |
1037 pix[4]= pix[ 3] + block[4]; | 1042 pix[3]= v += block[3]; |
1038 pix[5]= pix[ 4] + block[5]; | 1043 pix[4]= v += block[4]; |
1039 pix[6]= pix[ 5] + block[6]; | 1044 pix[5]= v += block[5]; |
1040 pix[7]= pix[ 6] + block[7]; | 1045 pix[6]= v += block[6]; |
1046 pix[7]= v += block[7]; | |
1041 pix+= stride; | 1047 pix+= stride; |
1042 block+= 8; | 1048 block+= 8; |
1043 } | 1049 } |
1044 } | 1050 } |
1045 | 1051 |