Mercurial > libavcodec.hg
comparison h264.c @ 11002:1c8892d7a090 libavcodec
H.264: Use 64-/128-bit write-combining macros for copies
2-3% faster decode on x86-32 core2.
author | astrange |
---|---|
date | Mon, 25 Jan 2010 00:30:44 +0000 |
parents | 79f2e73f3714 |
children | 72c026446d67 |
comparison
equal
deleted
inserted
replaced
11001:621268959a5c | 11002:1c8892d7a090 |
---|---|
943 return 0; | 943 return 0; |
944 } | 944 } |
945 | 945 |
946 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){ | 946 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){ |
947 MpegEncContext * const s = &h->s; | 947 MpegEncContext * const s = &h->s; |
948 uint8_t *top_border; | |
948 int top_idx = 1; | 949 int top_idx = 1; |
949 | 950 |
950 src_y -= linesize; | 951 src_y -= linesize; |
951 src_cb -= uvlinesize; | 952 src_cb -= uvlinesize; |
952 src_cr -= uvlinesize; | 953 src_cr -= uvlinesize; |
953 | 954 |
954 if(!simple && FRAME_MBAFF){ | 955 if(!simple && FRAME_MBAFF){ |
955 if(s->mb_y&1){ | 956 if(s->mb_y&1){ |
956 if(!MB_MBAFF){ | 957 if(!MB_MBAFF){ |
957 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y + 15*linesize); | 958 top_border = h->top_borders[0][s->mb_x]; |
958 *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize); | 959 AV_COPY128(top_border, src_y + 15*linesize); |
959 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ | 960 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ |
960 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize); | 961 AV_COPY64(top_border+16, src_cb+7*uvlinesize); |
961 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize); | 962 AV_COPY64(top_border+24, src_cr+7*uvlinesize); |
962 } | 963 } |
963 } | 964 } |
964 }else if(MB_MBAFF){ | 965 }else if(MB_MBAFF){ |
965 top_idx = 0; | 966 top_idx = 0; |
966 }else | 967 }else |
967 return; | 968 return; |
968 } | 969 } |
969 | 970 |
971 top_border = h->top_borders[top_idx][s->mb_x]; | |
970 // There are two lines saved, the line above the the top macroblock of a pair, | 972 // There are two lines saved, the line above the the top macroblock of a pair, |
971 // and the line above the bottom macroblock | 973 // and the line above the bottom macroblock |
972 | 974 AV_COPY128(top_border, src_y + 16*linesize); |
973 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize); | |
974 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize); | |
975 | 975 |
976 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ | 976 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ |
977 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize); | 977 AV_COPY64(top_border+16, src_cb+8*uvlinesize); |
978 *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize); | 978 AV_COPY64(top_border+24, src_cr+8*uvlinesize); |
979 } | 979 } |
980 } | 980 } |
981 | 981 |
982 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){ | 982 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){ |
983 MpegEncContext * const s = &h->s; | 983 MpegEncContext * const s = &h->s; |
985 uint64_t temp64; | 985 uint64_t temp64; |
986 int deblock_left; | 986 int deblock_left; |
987 int deblock_top; | 987 int deblock_top; |
988 int mb_xy; | 988 int mb_xy; |
989 int top_idx = 1; | 989 int top_idx = 1; |
990 uint8_t *top_border_m1 = h->top_borders[top_idx][s->mb_x-1]; | |
991 uint8_t *top_border = h->top_borders[top_idx][s->mb_x]; | |
990 | 992 |
991 if(!simple && FRAME_MBAFF){ | 993 if(!simple && FRAME_MBAFF){ |
992 if(s->mb_y&1){ | 994 if(s->mb_y&1){ |
993 if(!MB_MBAFF) | 995 if(!MB_MBAFF) |
994 return; | 996 return; |
1008 | 1010 |
1009 src_y -= linesize + 1; | 1011 src_y -= linesize + 1; |
1010 src_cb -= uvlinesize + 1; | 1012 src_cb -= uvlinesize + 1; |
1011 src_cr -= uvlinesize + 1; | 1013 src_cr -= uvlinesize + 1; |
1012 | 1014 |
1013 #define XCHG(a,b,t,xchg)\ | 1015 #define XCHG(a,b,xchg)\ |
1014 t= a;\ | 1016 if (xchg) AV_SWAP64(b,a);\ |
1015 if(xchg)\ | 1017 else AV_COPY64(b,a); |
1016 a= b;\ | |
1017 b= t; | |
1018 | 1018 |
1019 if(deblock_top){ | 1019 if(deblock_top){ |
1020 if(deblock_left){ | 1020 if(deblock_left){ |
1021 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x-1]+8), *(uint64_t*)(src_y -7), temp64, 1); | 1021 XCHG(top_border_m1+8, src_y -7, 1); |
1022 } | 1022 } |
1023 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg); | 1023 XCHG(top_border+0, src_y +1, xchg); |
1024 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1); | 1024 XCHG(top_border+8, src_y +9, 1); |
1025 if(s->mb_x+1 < s->mb_width){ | 1025 if(s->mb_x+1 < s->mb_width){ |
1026 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1); | 1026 XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1); |
1027 } | 1027 } |
1028 } | 1028 } |
1029 | 1029 |
1030 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ | 1030 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ |
1031 if(deblock_top){ | 1031 if(deblock_top){ |
1032 if(deblock_left){ | 1032 if(deblock_left){ |
1033 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x-1]+16), *(uint64_t*)(src_cb -7), temp64, 1); | 1033 XCHG(top_border_m1+16, src_cb -7, 1); |
1034 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x-1]+24), *(uint64_t*)(src_cr -7), temp64, 1); | 1034 XCHG(top_border_m1+24, src_cr -7, 1); |
1035 } | 1035 } |
1036 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1); | 1036 XCHG(top_border+16, src_cb+1, 1); |
1037 XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1); | 1037 XCHG(top_border+24, src_cr+1, 1); |
1038 } | 1038 } |
1039 } | 1039 } |
1040 } | 1040 } |
1041 | 1041 |
1042 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ | 1042 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ |