view lzo.c @ 728:1fa3820b1a84 libavutil

ARM asm for AV_RN*() ARMv6 and later support unaligned loads and stores for single word/halfword but not double/multiple. GCC is ignorant of this and will always use bytewise accesses for unaligned data. Casting to an int32_t pointer is dangerous since a load/store double or multiple instruction might be used (this happens with some code in FFmpeg). Implementing the AV_[RW]* macros with inline asm using only supported instructions gives fast and safe unaligned accesses. ARM RVCT does the right thing with generic code. This gives an overall speedup of up to 10%.
author mru
date Sat, 18 Apr 2009 00:00:28 +0000
parents 0a1867c7ec5e
children
line wrap: on
line source

/*
 * LZO 1x decompression
 * Copyright (c) 2006 Reimar Doeffinger
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "avutil.h"
#include "common.h"
//! Avoid e.g. MPlayers fast_memcpy, it slows things down here.
#undef memcpy
#include <string.h>
#include "lzo.h"

//! Define if we may write up to 12 bytes beyond the output buffer.
#define OUTBUF_PADDED 1
//! Define if we may read up to 8 bytes beyond the input buffer.
#define INBUF_PADDED 1
typedef struct LZOContext {
    const uint8_t *in, *in_end;
    uint8_t *out_start, *out, *out_end;
    int error;
} LZOContext;

/**
 * \brief Reads one byte from the input buffer, avoiding an overrun.
 * \return byte read
 */
static inline int get_byte(LZOContext *c) {
    if (c->in < c->in_end)
        return *c->in++;
    c->error |= AV_LZO_INPUT_DEPLETED;
    return 1;
}

#ifdef INBUF_PADDED
#define GETB(c) (*(c).in++)
#else
#define GETB(c) get_byte(&(c))
#endif

/**
 * \brief Decodes a length value in the coding used by lzo.
 * \param x previous byte value
 * \param mask bits used from x
 * \return decoded length value
 */
static inline int get_len(LZOContext *c, int x, int mask) {
    int cnt = x & mask;
    if (!cnt) {
        while (!(x = get_byte(c))) cnt += 255;
        cnt += mask + x;
    }
    return cnt;
}

//#define UNALIGNED_LOADSTORE
#define BUILTIN_MEMCPY
#ifdef UNALIGNED_LOADSTORE
#define COPY2(d, s) *(uint16_t *)(d) = *(uint16_t *)(s);
#define COPY4(d, s) *(uint32_t *)(d) = *(uint32_t *)(s);
#elif defined(BUILTIN_MEMCPY)
#define COPY2(d, s) memcpy(d, s, 2);
#define COPY4(d, s) memcpy(d, s, 4);
#else
#define COPY2(d, s) (d)[0] = (s)[0]; (d)[1] = (s)[1];
#define COPY4(d, s) (d)[0] = (s)[0]; (d)[1] = (s)[1]; (d)[2] = (s)[2]; (d)[3] = (s)[3];
#endif

/**
 * \brief Copies bytes from input to output buffer with checking.
 * \param cnt number of bytes to copy, must be >= 0
 */
static inline void copy(LZOContext *c, int cnt) {
    register const uint8_t *src = c->in;
    register uint8_t *dst = c->out;
    if (cnt > c->in_end - src) {
        cnt = FFMAX(c->in_end - src, 0);
        c->error |= AV_LZO_INPUT_DEPLETED;
    }
    if (cnt > c->out_end - dst) {
        cnt = FFMAX(c->out_end - dst, 0);
        c->error |= AV_LZO_OUTPUT_FULL;
    }
#if defined(INBUF_PADDED) && defined(OUTBUF_PADDED)
    COPY4(dst, src);
    src += 4;
    dst += 4;
    cnt -= 4;
    if (cnt > 0)
#endif
        memcpy(dst, src, cnt);
    c->in = src + cnt;
    c->out = dst + cnt;
}

static inline void memcpy_backptr(uint8_t *dst, int back, int cnt);

/**
 * \brief Copies previously decoded bytes to current position.
 * \param back how many bytes back we start
 * \param cnt number of bytes to copy, must be >= 0
 *
 * cnt > back is valid, this will copy the bytes we just copied,
 * thus creating a repeating pattern with a period length of back.
 */
static inline void copy_backptr(LZOContext *c, int back, int cnt) {
    register const uint8_t *src = &c->out[-back];
    register uint8_t *dst = c->out;
    if (src < c->out_start || src > dst) {
        c->error |= AV_LZO_INVALID_BACKPTR;
        return;
    }
    if (cnt > c->out_end - dst) {
        cnt = FFMAX(c->out_end - dst, 0);
        c->error |= AV_LZO_OUTPUT_FULL;
    }
    memcpy_backptr(dst, back, cnt);
    c->out = dst + cnt;
}

static inline void memcpy_backptr(uint8_t *dst, int back, int cnt) {
    const uint8_t *src = &dst[-back];
    if (back == 1) {
        memset(dst, *src, cnt);
    } else {
#ifdef OUTBUF_PADDED
        COPY2(dst, src);
        COPY2(dst + 2, src + 2);
        src += 4;
        dst += 4;
        cnt -= 4;
        if (cnt > 0) {
            COPY2(dst, src);
            COPY2(dst + 2, src + 2);
            COPY2(dst + 4, src + 4);
            COPY2(dst + 6, src + 6);
            src += 8;
            dst += 8;
            cnt -= 8;
        }
#endif
        if (cnt > 0) {
            int blocklen = back;
            while (cnt > blocklen) {
                memcpy(dst, src, blocklen);
                dst += blocklen;
                cnt -= blocklen;
                blocklen <<= 1;
            }
            memcpy(dst, src, cnt);
        }
    }
}

void av_memcpy_backptr(uint8_t *dst, int back, int cnt) {
    memcpy_backptr(dst, back, cnt);
}

int av_lzo1x_decode(void *out, int *outlen, const void *in, int *inlen) {
    int state= 0;
    int x;
    LZOContext c;
    c.in = in;
    c.in_end = (const uint8_t *)in + *inlen;
    c.out = c.out_start = out;
    c.out_end = (uint8_t *)out + * outlen;
    c.error = 0;
    x = GETB(c);
    if (x > 17) {
        copy(&c, x - 17);
        x = GETB(c);
        if (x < 16) c.error |= AV_LZO_ERROR;
    }
    if (c.in > c.in_end)
        c.error |= AV_LZO_INPUT_DEPLETED;
    while (!c.error) {
        int cnt, back;
        if (x > 15) {
            if (x > 63) {
                cnt = (x >> 5) - 1;
                back = (GETB(c) << 3) + ((x >> 2) & 7) + 1;
            } else if (x > 31) {
                cnt = get_len(&c, x, 31);
                x = GETB(c);
                back = (GETB(c) << 6) + (x >> 2) + 1;
            } else {
                cnt = get_len(&c, x, 7);
                back = (1 << 14) + ((x & 8) << 11);
                x = GETB(c);
                back += (GETB(c) << 6) + (x >> 2);
                if (back == (1 << 14)) {
                    if (cnt != 1)
                        c.error |= AV_LZO_ERROR;
                    break;
                }
            }
        } else if(!state){
                cnt = get_len(&c, x, 15);
                copy(&c, cnt + 3);
                x = GETB(c);
                if (x > 15)
                    continue;
                cnt = 1;
                back = (1 << 11) + (GETB(c) << 2) + (x >> 2) + 1;
        } else {
                cnt = 0;
                back = (GETB(c) << 2) + (x >> 2) + 1;
        }
        copy_backptr(&c, back, cnt + 2);
        state=
        cnt = x & 3;
        copy(&c, cnt);
        x = GETB(c);
    }
    *inlen = c.in_end - c.in;
    if (c.in > c.in_end)
        *inlen = 0;
    *outlen = c.out_end - c.out;
    return c.error;
}

#ifdef TEST
#include <stdio.h>
#include <lzo/lzo1x.h>
#include "log.h"
#define MAXSZ (10*1024*1024)

/* Define one of these to 1 if you wish to benchmark liblzo
 * instead of our native implementation. */
#define BENCHMARK_LIBLZO_SAFE   0
#define BENCHMARK_LIBLZO_UNSAFE 0

int main(int argc, char *argv[]) {
    FILE *in = fopen(argv[1], "rb");
    uint8_t *orig = av_malloc(MAXSZ + 16);
    uint8_t *comp = av_malloc(2*MAXSZ + 16);
    uint8_t *decomp = av_malloc(MAXSZ + 16);
    size_t s = fread(orig, 1, MAXSZ, in);
    lzo_uint clen = 0;
    long tmp[LZO1X_MEM_COMPRESS];
    int inlen, outlen;
    int i;
    av_log_set_level(AV_LOG_DEBUG);
    lzo1x_999_compress(orig, s, comp, &clen, tmp);
    for (i = 0; i < 300; i++) {
START_TIMER
        inlen = clen; outlen = MAXSZ;
#if BENCHMARK_LIBLZO_SAFE
        if (lzo1x_decompress_safe(comp, inlen, decomp, &outlen, NULL))
#elif BENCHMARK_LIBLZO_UNSAFE
        if (lzo1x_decompress(comp, inlen, decomp, &outlen, NULL))
#else
        if (av_lzo1x_decode(decomp, &outlen, comp, &inlen))
#endif
            av_log(NULL, AV_LOG_ERROR, "decompression error\n");
STOP_TIMER("lzod")
    }
    if (memcmp(orig, decomp, s))
        av_log(NULL, AV_LOG_ERROR, "decompression incorrect\n");
    else
        av_log(NULL, AV_LOG_ERROR, "decompression OK\n");
    return 0;
}
#endif