# HG changeset patch # User reimar # Date 1137957012 0 # Node ID a2f611d6c34dd65f1dfac0fd0a067ed3f86b27aa # Parent 61b4cc04298864038f57f7fad54dbf53d5add169 faster copy functions for lzo decoder that also need padding diff -r 61b4cc042988 -r a2f611d6c34d cscd.c --- a/cscd.c Sat Jan 21 18:19:47 2006 +0000 +++ b/cscd.c Sun Jan 22 19:10:12 2006 +0000 @@ -232,7 +232,7 @@ c->linelen = avctx->width * avctx->bits_per_sample / 8; c->height = avctx->height; c->decomp_size = c->height * c->linelen; - c->decomp_buf = av_malloc(c->decomp_size); + c->decomp_buf = av_malloc(c->decomp_size + LZO_OUTPUT_PADDING); if (!c->decomp_buf) { av_log(avctx, AV_LOG_ERROR, "Can't allocate decompression buffer.\n"); return 1; diff -r 61b4cc042988 -r a2f611d6c34d lzo.c --- a/lzo.c Sat Jan 21 18:19:47 2006 +0000 +++ b/lzo.c Sun Jan 22 19:10:12 2006 +0000 @@ -17,12 +17,18 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "common.h" +//! avoid e.g. MPlayers fast_memcpy, it slows things down here +#undef memcpy +#include #include "lzo.h" +//! define if we may write up to 12 bytes beyond the output buffer +#define OUTBUF_PADDED 1 +//! define if we may read up to 4 bytes beyond the input buffer +#define INBUF_PADDED 1 typedef struct LZOContext { uint8_t *in, *in_end; - uint8_t *out, *out_end; - int out_size; + uint8_t *out_start, *out, *out_end; int error; } LZOContext; @@ -57,17 +63,29 @@ * \param cnt number of bytes to copy, must be > 0 */ static inline void copy(LZOContext *c, int cnt) { - if (c->in + cnt > c->in_end) { - cnt = c->in_end - c->in; + register uint8_t *src = c->in; + register uint8_t *dst = c->out; + if (src + cnt > c->in_end) { + cnt = c->in_end - src; c->error |= LZO_INPUT_DEPLETED; } - if (c->out + cnt > c->out_end) { - cnt = c->out_end - c->out; + if (dst + cnt > c->out_end) { + cnt = c->out_end - dst; c->error |= LZO_OUTPUT_FULL; } - do { - *c->out++ = *c->in++; - } while (--cnt); +#if defined(INBUF_PADDED) && defined(OUTBUF_PADDED) + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; + src += 4; + dst += 4; + cnt -= 4; + if (cnt > 0) +#endif + memcpy(dst, src, cnt); + c->in = src + cnt; + c->out = dst + cnt; } /** @@ -75,20 +93,59 @@ * \param back how many bytes back we start * \param cnt number of bytes to copy, must be > 0 * - * cnt > back is valid, this will copy the bytes we just copied. + * cnt > back is valid, this will copy the bytes we just copied, + * thus creating a repeating pattern with a period length of back. */ static inline void copy_backptr(LZOContext *c, int back, int cnt) { - if (c->out - back < c->out_end - c->out_size) { + register uint8_t *src = &c->out[-back]; + register uint8_t *dst = c->out; + if (src < c->out_start) { c->error |= LZO_INVALID_BACKPTR; return; } - if (c->out + cnt > c->out_end) { - cnt = c->out_end - c->out; + if (dst + cnt > c->out_end) { + cnt = c->out_end - dst; c->error |= LZO_OUTPUT_FULL; } - do { - *c->out++ = c->out[-back]; - } while (--cnt); + if (back == 1) { + memset(dst, *src, cnt); + dst += cnt; + } else { +#ifdef OUTBUF_PADDED + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; + src += 4; + dst += 4; + cnt -= 4; + if (cnt > 0) { + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; + dst[4] = src[4]; + dst[5] = src[5]; + dst[6] = src[6]; + dst[7] = src[7]; + src += 8; + dst += 8; + cnt -= 8; + } +#endif + if (cnt > 0) { + int blocklen = back; + while (cnt > blocklen) { + memcpy(dst, src, blocklen); + dst += blocklen; + cnt -= blocklen; + blocklen <<= 1; + } + memcpy(dst, src, cnt); + } + dst += cnt; + } + c->out = dst; } /** @@ -98,6 +155,9 @@ * \param in input buffer * \param inlen size of input buffer, number of bytes left are returned here * \return 0 on success, otherwise error flags, see lzo.h + * + * make sure all buffers are appropriately padded, in must provide + * LZO_INPUT_PADDING, out must provide LZO_OUTPUT_PADDING additional bytes */ int lzo1x_decode(void *out, int *outlen, void *in, int *inlen) { enum {COPY, BACKPTR} state = COPY; @@ -105,9 +165,8 @@ LZOContext c; c.in = in; c.in_end = in + *inlen; - c.out = out; + c.out = c.out_start = out; c.out_end = out + * outlen; - c.out_size = *outlen; c.error = 0; x = get_byte(&c); if (x > 17) { diff -r 61b4cc042988 -r a2f611d6c34d lzo.h --- a/lzo.h Sat Jan 21 18:19:47 2006 +0000 +++ b/lzo.h Sun Jan 22 19:10:12 2006 +0000 @@ -6,6 +6,9 @@ #define LZO_INVALID_BACKPTR 4 #define LZO_ERROR 8 +#define LZO_INPUT_PADDING 4 +#define LZO_OUTPUT_PADDING 12 + int lzo1x_decode(void *out, int *outlen, void *in, int *inlen); #endif