comparison vp8.c @ 11989:176c5deb6756 libavcodec

Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but we apply them as 16x8/8x16/8x8 subblocks where possible. Since this allows us to use width=8/16 instead of width=4 MC functions, we can now take more advantage of SSE2/SSSE3 optimizations, leading to a total speedup for splitMV filter of about 10%.
author rbultje
date Mon, 28 Jun 2010 13:50:55 +0000
parents 356b20a6566d
children 3c51d7ac41c9
comparison
equal deleted inserted replaced
11988:e382860b855f 11989:176c5deb6756
941 } 941 }
942 942
943 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); 943 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
944 } 944 }
945 945
946 static inline void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
947 AVFrame *ref_frame, int x_off, int y_off,
948 int bx_off, int by_off,
949 int block_w, int block_h,
950 int width, int height, VP56mv *mv)
951 {
952 VP56mv uvmv = *mv;
953
954 /* Y */
955 vp8_mc(s, 1, dst[0] + by_off * s->linesize + bx_off,
956 ref_frame->data[0], mv, x_off + bx_off, y_off + by_off,
957 block_w, block_h, width, height, s->linesize,
958 s->put_pixels_tab[block_w == 8]);
959
960 /* U/V */
961 if (s->profile == 3) {
962 uvmv.x &= ~7;
963 uvmv.y &= ~7;
964 }
965 x_off >>= 1; y_off >>= 1;
966 bx_off >>= 1; by_off >>= 1;
967 width >>= 1; height >>= 1;
968 block_w >>= 1; block_h >>= 1;
969 vp8_mc(s, 0, dst[1] + by_off * s->uvlinesize + bx_off,
970 ref_frame->data[1], &uvmv, x_off + bx_off, y_off + by_off,
971 block_w, block_h, width, height, s->uvlinesize,
972 s->put_pixels_tab[1 + (block_w == 4)]);
973 vp8_mc(s, 0, dst[2] + by_off * s->uvlinesize + bx_off,
974 ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off,
975 block_w, block_h, width, height, s->uvlinesize,
976 s->put_pixels_tab[1 + (block_w == 4)]);
977 }
978
946 /** 979 /**
947 * Apply motion vectors to prediction buffer, chapter 18. 980 * Apply motion vectors to prediction buffer, chapter 18.
948 */ 981 */
949 static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, 982 static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
950 int mb_x, int mb_y) 983 int mb_x, int mb_y)
951 { 984 {
952 int x_off = mb_x << 4, y_off = mb_y << 4; 985 int x_off = mb_x << 4, y_off = mb_y << 4;
953 int width = 16*s->mb_width, height = 16*s->mb_height; 986 int width = 16*s->mb_width, height = 16*s->mb_height;
954 VP56mv uvmv;
955 987
956 if (mb->mode < VP8_MVMODE_SPLIT) { 988 if (mb->mode < VP8_MVMODE_SPLIT) {
957 /* Y */ 989 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
958 vp8_mc(s, 1, dst[0], s->framep[mb->ref_frame]->data[0], &mb->mv, 990 0, 0, 16, 16, width, height, &mb->mv);
959 x_off, y_off, 16, 16, width, height, s->linesize, 991 } else switch (mb->partitioning) {
960 s->put_pixels_tab[0]); 992 case VP8_SPLITMVMODE_4x4: {
961
962 /* U/V */
963 uvmv = mb->mv;
964 if (s->profile == 3) {
965 uvmv.x &= ~7;
966 uvmv.y &= ~7;
967 }
968 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
969 vp8_mc(s, 0, dst[1], s->framep[mb->ref_frame]->data[1], &uvmv,
970 x_off, y_off, 8, 8, width, height, s->uvlinesize,
971 s->put_pixels_tab[1]);
972 vp8_mc(s, 0, dst[2], s->framep[mb->ref_frame]->data[2], &uvmv,
973 x_off, y_off, 8, 8, width, height, s->uvlinesize,
974 s->put_pixels_tab[1]);
975 } else {
976 int x, y; 993 int x, y;
994 VP56mv uvmv;
977 995
978 /* Y */ 996 /* Y */
979 for (y = 0; y < 4; y++) { 997 for (y = 0; y < 4; y++) {
980 for (x = 0; x < 4; x++) { 998 for (x = 0; x < 4; x++) {
981 vp8_mc(s, 1, dst[0] + 4*y*s->linesize + x*4, 999 vp8_mc(s, 1, dst[0] + 4*y*s->linesize + x*4,
1014 4*x + x_off, 4*y + y_off, 4, 4, 1032 4*x + x_off, 4*y + y_off, 4, 4,
1015 width, height, s->uvlinesize, 1033 width, height, s->uvlinesize,
1016 s->put_pixels_tab[2]); 1034 s->put_pixels_tab[2]);
1017 } 1035 }
1018 } 1036 }
1037 break;
1038 }
1039 case VP8_SPLITMVMODE_16x8:
1040 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
1041 0, 0, 16, 8, width, height, &mb->bmv[0]);
1042 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
1043 0, 8, 16, 8, width, height, &mb->bmv[8]);
1044 break;
1045 case VP8_SPLITMVMODE_8x16:
1046 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
1047 0, 0, 8, 16, width, height, &mb->bmv[0]);
1048 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
1049 8, 0, 8, 16, width, height, &mb->bmv[2]);
1050 break;
1051 case VP8_SPLITMVMODE_8x8:
1052 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
1053 0, 0, 8, 8, width, height, &mb->bmv[0]);
1054 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
1055 8, 0, 8, 8, width, height, &mb->bmv[2]);
1056 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
1057 0, 8, 8, 8, width, height, &mb->bmv[8]);
1058 vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
1059 8, 8, 8, 8, width, height, &mb->bmv[10]);
1060 break;
1019 } 1061 }
1020 } 1062 }
1021 1063
1022 static void idct_mb(VP8Context *s, uint8_t *y_dst, uint8_t *u_dst, uint8_t *v_dst, 1064 static void idct_mb(VP8Context *s, uint8_t *y_dst, uint8_t *u_dst, uint8_t *v_dst,
1023 VP8Macroblock *mb) 1065 VP8Macroblock *mb)