Mercurial > libavcodec.hg
view alpha/motion_est_mvi_asm.S @ 2601:c31a28f27d9a libavcodec
increasing precission of the quantization parameter
this is needed as the quantization stepsize for each subband is also in this precission and insignificant changes to the wavelet like scaling its coefficients slightly differently would lead to wildly variing PSNR and bitrate
note, a encoder could also simply choose to leave the least significant bits of the quantization parameters zero which would give the exact previous behaviour except a y very tiny number of bits in the header
author | michael |
---|---|
date | Sat, 09 Apr 2005 22:15:48 +0000 |
parents | 503496800167 |
children | ef2149182f1c |
line wrap: on
line source
/* * Alpha optimized DSP utils * Copyright (c) 2002 Falk Hueffner <falk@debian.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "regdef.h" /* Some nicer register names. */ #define ta t10 #define tb t11 #define tc t12 #define td AT /* Danger: these overlap with the argument list and the return value */ #define te a5 #define tf a4 #define tg a3 #define th v0 .set noat .set noreorder .arch pca56 .text /***************************************************************************** * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size) * * This code is written with a pca56 in mind. For ev6, one should * really take the increased latency of 3 cycles for MVI instructions * into account. * * It is important to keep the loading and first use of a register as * far apart as possible, because if a register is accessed before it * has been fetched from memory, the CPU will stall. */ .align 4 .globl pix_abs16x16_mvi_asm .ent pix_abs16x16_mvi_asm pix_abs16x16_mvi_asm: .frame sp, 0, ra, 0 .prologue 0 #ifdef HAVE_GPROF lda AT, _mcount jsr AT, (AT), _mcount #endif and a1, 7, t0 clr v0 lda a3, 16 beq t0, $aligned .align 4 $unaligned: /* Registers: line 0: t0: left_u -> left lo -> left t1: mid t2: right_u -> right hi -> right t3: ref left t4: ref right line 1: t5: left_u -> left lo -> left t6: mid t7: right_u -> right hi -> right t8: ref left t9: ref right temp: ta: left hi tb: right lo tc: error left td: error right */ /* load line 0 */ ldq_u t0, 0(a1) # left_u ldq_u t1, 8(a1) # mid ldq_u t2, 16(a1) # right_u ldq t3, 0(a0) # ref left ldq t4, 8(a0) # ref right addq a0, a2, a0 # pix1 addq a1, a2, a1 # pix2 /* load line 1 */ ldq_u t5, 0(a1) # left_u ldq_u t6, 8(a1) # mid ldq_u t7, 16(a1) # right_u ldq t8, 0(a0) # ref left ldq t9, 8(a0) # ref right addq a0, a2, a0 # pix1 addq a1, a2, a1 # pix2 /* calc line 0 */ extql t0, a1, t0 # left lo extqh t1, a1, ta # left hi extql t1, a1, tb # right lo or t0, ta, t0 # left extqh t2, a1, t2 # right hi perr t3, t0, tc # error left or t2, tb, t2 # right perr t4, t2, td # error right addq v0, tc, v0 # add error left addq v0, td, v0 # add error left /* calc line 1 */ extql t5, a1, t5 # left lo extqh t6, a1, ta # left hi extql t6, a1, tb # right lo or t5, ta, t5 # left extqh t7, a1, t7 # right hi perr t8, t5, tc # error left or t7, tb, t7 # right perr t9, t7, td # error right addq v0, tc, v0 # add error left addq v0, td, v0 # add error left /* loop */ subq a3, 2, a3 # h -= 2 bne a3, $unaligned ret .align 4 $aligned: /* load line 0 */ ldq t0, 0(a1) # left ldq t1, 8(a1) # right addq a1, a2, a1 # pix2 ldq t2, 0(a0) # ref left ldq t3, 8(a0) # ref right addq a0, a2, a0 # pix1 /* load line 1 */ ldq t4, 0(a1) # left ldq t5, 8(a1) # right addq a1, a2, a1 # pix2 ldq t6, 0(a0) # ref left ldq t7, 8(a0) # ref right addq a0, a2, a0 # pix1 /* load line 2 */ ldq t8, 0(a1) # left ldq t9, 8(a1) # right addq a1, a2, a1 # pix2 ldq ta, 0(a0) # ref left ldq tb, 8(a0) # ref right addq a0, a2, a0 # pix1 /* load line 3 */ ldq tc, 0(a1) # left ldq td, 8(a1) # right addq a1, a2, a1 # pix2 ldq te, 0(a0) # ref left ldq tf, 8(a0) # ref right /* calc line 0 */ perr t0, t2, t0 # error left addq a0, a2, a0 # pix1 perr t1, t3, t1 # error right addq v0, t0, v0 # add error left /* calc line 1 */ perr t4, t6, t0 # error left addq v0, t1, v0 # add error right perr t5, t7, t1 # error right addq v0, t0, v0 # add error left /* calc line 2 */ perr t8, ta, t0 # error left addq v0, t1, v0 # add error right perr t9, tb, t1 # error right addq v0, t0, v0 # add error left /* calc line 3 */ perr tc, te, t0 # error left addq v0, t1, v0 # add error right perr td, tf, t1 # error right addq v0, t0, v0 # add error left addq v0, t1, v0 # add error right /* loop */ subq a3, 4, a3 # h -= 4 bne a3, $aligned ret .end pix_abs16x16_mvi_asm