Mercurial > libavcodec.hg
comparison dnxhdenc.c @ 10214:97f38ca4ed14 libavcodec
Use a custom radix sort implementation instead of qsort in dnxhd encoder.
This is mainly to avoid test failures due to implementation-defined
behaviour of qsort when elements are equal, giving different results
for each of FreeBSD, Linux/glibc and Solaris.
In addition it is about 35 % faster, effect on overall speed is minimal
though (< 2%).
Regression tests are unchanged (i.e. identical to Linux/glibc).
author | reimar |
---|---|
date | Mon, 21 Sep 2009 10:28:31 +0000 |
parents | f01415f3f9a8 |
children | 59ec306245a4 |
comparison
equal
deleted
inserted
replaced
10213:cebf6e3381e0 | 10214:97f38ca4ed14 |
---|---|
649 //dprintf(ctx->m.avctx, "out qscale %d\n", qscale); | 649 //dprintf(ctx->m.avctx, "out qscale %d\n", qscale); |
650 ctx->qscale = qscale; | 650 ctx->qscale = qscale; |
651 return 0; | 651 return 0; |
652 } | 652 } |
653 | 653 |
654 static int dnxhd_rc_cmp(const void *a, const void *b) | 654 #define BUCKET_BITS 8 |
655 { | 655 #define RADIX_PASSES 4 |
656 return ((const RCCMPEntry *)b)->value - ((const RCCMPEntry *)a)->value; | 656 #define NBUCKETS (1 << BUCKET_BITS) |
657 | |
658 static inline int get_bucket(int value, int shift) | |
659 { | |
660 value >>= shift; | |
661 value &= NBUCKETS - 1; | |
662 return NBUCKETS - 1 - value; | |
663 } | |
664 | |
665 static void radix_count(const RCCMPEntry *data, int size, int buckets[RADIX_PASSES][NBUCKETS]) | |
666 { | |
667 int i, j; | |
668 memset(buckets, 0, sizeof(buckets[0][0]) * RADIX_PASSES * NBUCKETS); | |
669 for (i = 0; i < size; i++) { | |
670 int v = data[i].value; | |
671 for (j = 0; j < RADIX_PASSES; j++) { | |
672 buckets[j][get_bucket(v, 0)]++; | |
673 v >>= BUCKET_BITS; | |
674 } | |
675 assert(!v); | |
676 } | |
677 for (j = 0; j < RADIX_PASSES; j++) { | |
678 int offset = size; | |
679 for (i = NBUCKETS - 1; i >= 0; i--) | |
680 buckets[j][i] = offset -= buckets[j][i]; | |
681 assert(!buckets[j][0]); | |
682 } | |
683 } | |
684 | |
685 static void radix_sort_pass(RCCMPEntry *dst, const RCCMPEntry *data, int size, int buckets[NBUCKETS], int pass) | |
686 { | |
687 int shift = pass * BUCKET_BITS; | |
688 int i; | |
689 for (i = 0; i < size; i++) { | |
690 int v = get_bucket(data[i].value, shift); | |
691 int pos = buckets[v]++; | |
692 dst[pos] = data[i]; | |
693 } | |
694 } | |
695 | |
696 static void radix_sort(RCCMPEntry *data, int size) | |
697 { | |
698 int buckets[RADIX_PASSES][NBUCKETS]; | |
699 RCCMPEntry *tmp = av_malloc(sizeof(*tmp) * size); | |
700 radix_count(data, size, buckets); | |
701 radix_sort_pass(tmp, data, size, buckets[0], 0); | |
702 radix_sort_pass(data, tmp, size, buckets[1], 1); | |
703 if (buckets[2][NBUCKETS - 1] || buckets[3][NBUCKETS - 1]) { | |
704 radix_sort_pass(tmp, data, size, buckets[2], 2); | |
705 radix_sort_pass(data, tmp, size, buckets[3], 3); | |
706 } | |
707 av_free(tmp); | |
657 } | 708 } |
658 | 709 |
659 static int dnxhd_encode_fast(AVCodecContext *avctx, DNXHDEncContext *ctx) | 710 static int dnxhd_encode_fast(AVCodecContext *avctx, DNXHDEncContext *ctx) |
660 { | 711 { |
661 int max_bits = 0; | 712 int max_bits = 0; |
680 max_bits += 31; //worst padding | 731 max_bits += 31; //worst padding |
681 } | 732 } |
682 if (!ret) { | 733 if (!ret) { |
683 if (RC_VARIANCE) | 734 if (RC_VARIANCE) |
684 avctx->execute(avctx, dnxhd_mb_var_thread, (void**)&ctx->thread[0], NULL, avctx->thread_count, sizeof(void*)); | 735 avctx->execute(avctx, dnxhd_mb_var_thread, (void**)&ctx->thread[0], NULL, avctx->thread_count, sizeof(void*)); |
685 qsort(ctx->mb_cmp, ctx->m.mb_num, sizeof(RCEntry), dnxhd_rc_cmp); | 736 radix_sort(ctx->mb_cmp, ctx->m.mb_num); |
686 for (x = 0; x < ctx->m.mb_num && max_bits > ctx->frame_bits; x++) { | 737 for (x = 0; x < ctx->m.mb_num && max_bits > ctx->frame_bits; x++) { |
687 int mb = ctx->mb_cmp[x].mb; | 738 int mb = ctx->mb_cmp[x].mb; |
688 max_bits -= ctx->mb_rc[ctx->qscale][mb].bits - ctx->mb_rc[ctx->qscale+1][mb].bits; | 739 max_bits -= ctx->mb_rc[ctx->qscale][mb].bits - ctx->mb_rc[ctx->qscale+1][mb].bits; |
689 ctx->mb_qscale[mb] = ctx->qscale+1; | 740 ctx->mb_qscale[mb] = ctx->qscale+1; |
690 ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale+1][mb].bits; | 741 ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale+1][mb].bits; |