Mercurial > libavcodec.hg
view alpha/motion_est_mvi_asm.S @ 10061:09f2db2d7c90 libavcodec
Fix bug caused by difference in stride and picture width.
When a frame is allocated using libschroedinger routines, the frame data size
does not match the actual frame size if the width is not a multiple of 16. So
we cannot do a straightforward memcpy of the frame returned by libschroedinger
into the FFmpeg picture as the stride differs from the width.
Fix this bug by allocating for the libschroedinger frame with the dimensions
in AVCodecContext within libavcodec and passing the frame to libschroedinger.
patch by Anuradha Suraparaju, anuradha rd.bbc.co uk
author | diego |
---|---|
date | Sat, 15 Aug 2009 11:59:53 +0000 |
parents | 6f1b210e58d1 |
children | 58c2da0a371b |
line wrap: on
line source
/* * Alpha optimized DSP utils * Copyright (c) 2002 Falk Hueffner <falk@debian.org> * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "regdef.h" /* Some nicer register names. */ #define ta t10 #define tb t11 #define tc t12 #define td AT /* Danger: these overlap with the argument list and the return value */ #define te a5 #define tf a4 #define tg a3 #define th v0 .set noat .set noreorder .arch pca56 .text /***************************************************************************** * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size) * * This code is written with a pca56 in mind. For ev6, one should * really take the increased latency of 3 cycles for MVI instructions * into account. * * It is important to keep the loading and first use of a register as * far apart as possible, because if a register is accessed before it * has been fetched from memory, the CPU will stall. */ .align 4 .globl pix_abs16x16_mvi_asm .ent pix_abs16x16_mvi_asm pix_abs16x16_mvi_asm: .frame sp, 0, ra, 0 .prologue 0 #if CONFIG_GPROF lda AT, _mcount jsr AT, (AT), _mcount #endif and a2, 7, t0 clr v0 beq t0, $aligned .align 4 $unaligned: /* Registers: line 0: t0: left_u -> left lo -> left t1: mid t2: right_u -> right hi -> right t3: ref left t4: ref right line 1: t5: left_u -> left lo -> left t6: mid t7: right_u -> right hi -> right t8: ref left t9: ref right temp: ta: left hi tb: right lo tc: error left td: error right */ /* load line 0 */ ldq_u t0, 0(a2) # left_u ldq_u t1, 8(a2) # mid ldq_u t2, 16(a2) # right_u ldq t3, 0(a1) # ref left ldq t4, 8(a1) # ref right addq a1, a3, a1 # pix1 addq a2, a3, a2 # pix2 /* load line 1 */ ldq_u t5, 0(a2) # left_u ldq_u t6, 8(a2) # mid ldq_u t7, 16(a2) # right_u ldq t8, 0(a1) # ref left ldq t9, 8(a1) # ref right addq a1, a3, a1 # pix1 addq a2, a3, a2 # pix2 /* calc line 0 */ extql t0, a2, t0 # left lo extqh t1, a2, ta # left hi extql t1, a2, tb # right lo or t0, ta, t0 # left extqh t2, a2, t2 # right hi perr t3, t0, tc # error left or t2, tb, t2 # right perr t4, t2, td # error right addq v0, tc, v0 # add error left addq v0, td, v0 # add error left /* calc line 1 */ extql t5, a2, t5 # left lo extqh t6, a2, ta # left hi extql t6, a2, tb # right lo or t5, ta, t5 # left extqh t7, a2, t7 # right hi perr t8, t5, tc # error left or t7, tb, t7 # right perr t9, t7, td # error right addq v0, tc, v0 # add error left addq v0, td, v0 # add error left /* loop */ subq a4, 2, a4 # h -= 2 bne a4, $unaligned ret .align 4 $aligned: /* load line 0 */ ldq t0, 0(a2) # left ldq t1, 8(a2) # right addq a2, a3, a2 # pix2 ldq t2, 0(a1) # ref left ldq t3, 8(a1) # ref right addq a1, a3, a1 # pix1 /* load line 1 */ ldq t4, 0(a2) # left ldq t5, 8(a2) # right addq a2, a3, a2 # pix2 ldq t6, 0(a1) # ref left ldq t7, 8(a1) # ref right addq a1, a3, a1 # pix1 /* load line 2 */ ldq t8, 0(a2) # left ldq t9, 8(a2) # right addq a2, a3, a2 # pix2 ldq ta, 0(a1) # ref left ldq tb, 8(a1) # ref right addq a1, a3, a1 # pix1 /* load line 3 */ ldq tc, 0(a2) # left ldq td, 8(a2) # right addq a2, a3, a2 # pix2 ldq te, 0(a1) # ref left ldq a0, 8(a1) # ref right /* calc line 0 */ perr t0, t2, t0 # error left addq a1, a3, a1 # pix1 perr t1, t3, t1 # error right addq v0, t0, v0 # add error left /* calc line 1 */ perr t4, t6, t0 # error left addq v0, t1, v0 # add error right perr t5, t7, t1 # error right addq v0, t0, v0 # add error left /* calc line 2 */ perr t8, ta, t0 # error left addq v0, t1, v0 # add error right perr t9, tb, t1 # error right addq v0, t0, v0 # add error left /* calc line 3 */ perr tc, te, t0 # error left addq v0, t1, v0 # add error right perr td, a0, t1 # error right addq v0, t0, v0 # add error left addq v0, t1, v0 # add error right /* loop */ subq a4, 4, a4 # h -= 4 bne a4, $aligned ret .end pix_abs16x16_mvi_asm