comparison dnxhdenc.c @ 10214:97f38ca4ed14 libavcodec

Use a custom radix sort implementation instead of qsort in dnxhd encoder. This is mainly to avoid test failures due to implementation-defined behaviour of qsort when elements are equal, giving different results for each of FreeBSD, Linux/glibc and Solaris. In addition it is about 35 % faster, effect on overall speed is minimal though (< 2%). Regression tests are unchanged (i.e. identical to Linux/glibc).
author reimar
date Mon, 21 Sep 2009 10:28:31 +0000
parents f01415f3f9a8
children 59ec306245a4
comparison
equal deleted inserted replaced
10213:cebf6e3381e0 10214:97f38ca4ed14
649 //dprintf(ctx->m.avctx, "out qscale %d\n", qscale); 649 //dprintf(ctx->m.avctx, "out qscale %d\n", qscale);
650 ctx->qscale = qscale; 650 ctx->qscale = qscale;
651 return 0; 651 return 0;
652 } 652 }
653 653
654 static int dnxhd_rc_cmp(const void *a, const void *b) 654 #define BUCKET_BITS 8
655 { 655 #define RADIX_PASSES 4
656 return ((const RCCMPEntry *)b)->value - ((const RCCMPEntry *)a)->value; 656 #define NBUCKETS (1 << BUCKET_BITS)
657
658 static inline int get_bucket(int value, int shift)
659 {
660 value >>= shift;
661 value &= NBUCKETS - 1;
662 return NBUCKETS - 1 - value;
663 }
664
665 static void radix_count(const RCCMPEntry *data, int size, int buckets[RADIX_PASSES][NBUCKETS])
666 {
667 int i, j;
668 memset(buckets, 0, sizeof(buckets[0][0]) * RADIX_PASSES * NBUCKETS);
669 for (i = 0; i < size; i++) {
670 int v = data[i].value;
671 for (j = 0; j < RADIX_PASSES; j++) {
672 buckets[j][get_bucket(v, 0)]++;
673 v >>= BUCKET_BITS;
674 }
675 assert(!v);
676 }
677 for (j = 0; j < RADIX_PASSES; j++) {
678 int offset = size;
679 for (i = NBUCKETS - 1; i >= 0; i--)
680 buckets[j][i] = offset -= buckets[j][i];
681 assert(!buckets[j][0]);
682 }
683 }
684
685 static void radix_sort_pass(RCCMPEntry *dst, const RCCMPEntry *data, int size, int buckets[NBUCKETS], int pass)
686 {
687 int shift = pass * BUCKET_BITS;
688 int i;
689 for (i = 0; i < size; i++) {
690 int v = get_bucket(data[i].value, shift);
691 int pos = buckets[v]++;
692 dst[pos] = data[i];
693 }
694 }
695
696 static void radix_sort(RCCMPEntry *data, int size)
697 {
698 int buckets[RADIX_PASSES][NBUCKETS];
699 RCCMPEntry *tmp = av_malloc(sizeof(*tmp) * size);
700 radix_count(data, size, buckets);
701 radix_sort_pass(tmp, data, size, buckets[0], 0);
702 radix_sort_pass(data, tmp, size, buckets[1], 1);
703 if (buckets[2][NBUCKETS - 1] || buckets[3][NBUCKETS - 1]) {
704 radix_sort_pass(tmp, data, size, buckets[2], 2);
705 radix_sort_pass(data, tmp, size, buckets[3], 3);
706 }
707 av_free(tmp);
657 } 708 }
658 709
659 static int dnxhd_encode_fast(AVCodecContext *avctx, DNXHDEncContext *ctx) 710 static int dnxhd_encode_fast(AVCodecContext *avctx, DNXHDEncContext *ctx)
660 { 711 {
661 int max_bits = 0; 712 int max_bits = 0;
680 max_bits += 31; //worst padding 731 max_bits += 31; //worst padding
681 } 732 }
682 if (!ret) { 733 if (!ret) {
683 if (RC_VARIANCE) 734 if (RC_VARIANCE)
684 avctx->execute(avctx, dnxhd_mb_var_thread, (void**)&ctx->thread[0], NULL, avctx->thread_count, sizeof(void*)); 735 avctx->execute(avctx, dnxhd_mb_var_thread, (void**)&ctx->thread[0], NULL, avctx->thread_count, sizeof(void*));
685 qsort(ctx->mb_cmp, ctx->m.mb_num, sizeof(RCEntry), dnxhd_rc_cmp); 736 radix_sort(ctx->mb_cmp, ctx->m.mb_num);
686 for (x = 0; x < ctx->m.mb_num && max_bits > ctx->frame_bits; x++) { 737 for (x = 0; x < ctx->m.mb_num && max_bits > ctx->frame_bits; x++) {
687 int mb = ctx->mb_cmp[x].mb; 738 int mb = ctx->mb_cmp[x].mb;
688 max_bits -= ctx->mb_rc[ctx->qscale][mb].bits - ctx->mb_rc[ctx->qscale+1][mb].bits; 739 max_bits -= ctx->mb_rc[ctx->qscale][mb].bits - ctx->mb_rc[ctx->qscale+1][mb].bits;
689 ctx->mb_qscale[mb] = ctx->qscale+1; 740 ctx->mb_qscale[mb] = ctx->qscale+1;
690 ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale+1][mb].bits; 741 ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale+1][mb].bits;