Mercurial > libavcodec.hg
comparison vp8.c @ 11989:176c5deb6756 libavcodec
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
we apply them as 16x8/8x16/8x8 subblocks where possible. Since this allows
us to use width=8/16 instead of width=4 MC functions, we can now take more
advantage of SSE2/SSSE3 optimizations, leading to a total speedup for splitMV
filter of about 10%.
author | rbultje |
---|---|
date | Mon, 28 Jun 2010 13:50:55 +0000 |
parents | 356b20a6566d |
children | 3c51d7ac41c9 |
comparison
equal
deleted
inserted
replaced
11988:e382860b855f | 11989:176c5deb6756 |
---|---|
941 } | 941 } |
942 | 942 |
943 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); | 943 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); |
944 } | 944 } |
945 | 945 |
946 static inline void vp8_mc_part(VP8Context *s, uint8_t *dst[3], | |
947 AVFrame *ref_frame, int x_off, int y_off, | |
948 int bx_off, int by_off, | |
949 int block_w, int block_h, | |
950 int width, int height, VP56mv *mv) | |
951 { | |
952 VP56mv uvmv = *mv; | |
953 | |
954 /* Y */ | |
955 vp8_mc(s, 1, dst[0] + by_off * s->linesize + bx_off, | |
956 ref_frame->data[0], mv, x_off + bx_off, y_off + by_off, | |
957 block_w, block_h, width, height, s->linesize, | |
958 s->put_pixels_tab[block_w == 8]); | |
959 | |
960 /* U/V */ | |
961 if (s->profile == 3) { | |
962 uvmv.x &= ~7; | |
963 uvmv.y &= ~7; | |
964 } | |
965 x_off >>= 1; y_off >>= 1; | |
966 bx_off >>= 1; by_off >>= 1; | |
967 width >>= 1; height >>= 1; | |
968 block_w >>= 1; block_h >>= 1; | |
969 vp8_mc(s, 0, dst[1] + by_off * s->uvlinesize + bx_off, | |
970 ref_frame->data[1], &uvmv, x_off + bx_off, y_off + by_off, | |
971 block_w, block_h, width, height, s->uvlinesize, | |
972 s->put_pixels_tab[1 + (block_w == 4)]); | |
973 vp8_mc(s, 0, dst[2] + by_off * s->uvlinesize + bx_off, | |
974 ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off, | |
975 block_w, block_h, width, height, s->uvlinesize, | |
976 s->put_pixels_tab[1 + (block_w == 4)]); | |
977 } | |
978 | |
946 /** | 979 /** |
947 * Apply motion vectors to prediction buffer, chapter 18. | 980 * Apply motion vectors to prediction buffer, chapter 18. |
948 */ | 981 */ |
949 static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, | 982 static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, |
950 int mb_x, int mb_y) | 983 int mb_x, int mb_y) |
951 { | 984 { |
952 int x_off = mb_x << 4, y_off = mb_y << 4; | 985 int x_off = mb_x << 4, y_off = mb_y << 4; |
953 int width = 16*s->mb_width, height = 16*s->mb_height; | 986 int width = 16*s->mb_width, height = 16*s->mb_height; |
954 VP56mv uvmv; | |
955 | 987 |
956 if (mb->mode < VP8_MVMODE_SPLIT) { | 988 if (mb->mode < VP8_MVMODE_SPLIT) { |
957 /* Y */ | 989 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, |
958 vp8_mc(s, 1, dst[0], s->framep[mb->ref_frame]->data[0], &mb->mv, | 990 0, 0, 16, 16, width, height, &mb->mv); |
959 x_off, y_off, 16, 16, width, height, s->linesize, | 991 } else switch (mb->partitioning) { |
960 s->put_pixels_tab[0]); | 992 case VP8_SPLITMVMODE_4x4: { |
961 | |
962 /* U/V */ | |
963 uvmv = mb->mv; | |
964 if (s->profile == 3) { | |
965 uvmv.x &= ~7; | |
966 uvmv.y &= ~7; | |
967 } | |
968 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1; | |
969 vp8_mc(s, 0, dst[1], s->framep[mb->ref_frame]->data[1], &uvmv, | |
970 x_off, y_off, 8, 8, width, height, s->uvlinesize, | |
971 s->put_pixels_tab[1]); | |
972 vp8_mc(s, 0, dst[2], s->framep[mb->ref_frame]->data[2], &uvmv, | |
973 x_off, y_off, 8, 8, width, height, s->uvlinesize, | |
974 s->put_pixels_tab[1]); | |
975 } else { | |
976 int x, y; | 993 int x, y; |
994 VP56mv uvmv; | |
977 | 995 |
978 /* Y */ | 996 /* Y */ |
979 for (y = 0; y < 4; y++) { | 997 for (y = 0; y < 4; y++) { |
980 for (x = 0; x < 4; x++) { | 998 for (x = 0; x < 4; x++) { |
981 vp8_mc(s, 1, dst[0] + 4*y*s->linesize + x*4, | 999 vp8_mc(s, 1, dst[0] + 4*y*s->linesize + x*4, |
1014 4*x + x_off, 4*y + y_off, 4, 4, | 1032 4*x + x_off, 4*y + y_off, 4, 4, |
1015 width, height, s->uvlinesize, | 1033 width, height, s->uvlinesize, |
1016 s->put_pixels_tab[2]); | 1034 s->put_pixels_tab[2]); |
1017 } | 1035 } |
1018 } | 1036 } |
1037 break; | |
1038 } | |
1039 case VP8_SPLITMVMODE_16x8: | |
1040 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, | |
1041 0, 0, 16, 8, width, height, &mb->bmv[0]); | |
1042 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, | |
1043 0, 8, 16, 8, width, height, &mb->bmv[8]); | |
1044 break; | |
1045 case VP8_SPLITMVMODE_8x16: | |
1046 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, | |
1047 0, 0, 8, 16, width, height, &mb->bmv[0]); | |
1048 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, | |
1049 8, 0, 8, 16, width, height, &mb->bmv[2]); | |
1050 break; | |
1051 case VP8_SPLITMVMODE_8x8: | |
1052 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, | |
1053 0, 0, 8, 8, width, height, &mb->bmv[0]); | |
1054 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, | |
1055 8, 0, 8, 8, width, height, &mb->bmv[2]); | |
1056 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, | |
1057 0, 8, 8, 8, width, height, &mb->bmv[8]); | |
1058 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off, | |
1059 8, 8, 8, 8, width, height, &mb->bmv[10]); | |
1060 break; | |
1019 } | 1061 } |
1020 } | 1062 } |
1021 | 1063 |
1022 static void idct_mb(VP8Context *s, uint8_t *y_dst, uint8_t *u_dst, uint8_t *v_dst, | 1064 static void idct_mb(VP8Context *s, uint8_t *y_dst, uint8_t *u_dst, uint8_t *v_dst, |
1023 VP8Macroblock *mb) | 1065 VP8Macroblock *mb) |