view arm/mpegvideo_neon.S @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents fda5ab5d31bb
children
line wrap: on
line source

/*
 * Copyright (c) 2010 Mans Rullgard
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "asm.S"
#include "asm-offsets.h"

function ff_dct_unquantize_h263_inter_neon, export=1
        add             r0,  r0,  #0x2200
        add             r12, r0,  #BLOCK_LAST_INDEX-0x2200
        ldr             r12, [r12, r2, lsl #2]
        add             r0,  r0,  #INTER_SCANTAB_RASTER_END-0x2200
        ldrb            r12, [r0, r12]
        sub             r2,  r3,  #1
        lsl             r0,  r3,  #1
        orr             r2,  r2,  #1
        add             r3,  r12, #1
endfunc

function ff_dct_unquantize_h263_neon, export=1
        vdup.16         q15, r0                 @ qmul
        vdup.16         q14, r2                 @ qadd
        vneg.s16        q13, q14
        cmp             r3,  #4
        mov             r0,  r1
        ble             2f
1:
        vld1.16         {q0},     [r0,:128]!
        vclt.s16        q3,  q0,  #0
        vld1.16         {q8},     [r0,:128]!
        vceq.s16        q1,  q0,  #0
        vmul.s16        q2,  q0,  q15
        vclt.s16        q11, q8,  #0
        vmul.s16        q10, q8,  q15
        vbsl            q3,  q13, q14
        vbsl            q11, q13, q14
        vadd.s16        q2,  q2,  q3
        vceq.s16        q9,  q8,  #0
        vadd.s16        q10, q10, q11
        vbif            q0,  q2,  q1
        vbif            q8,  q10, q9
        subs            r3,  r3,  #16
        vst1.16         {q0},     [r1,:128]!
        vst1.16         {q8},     [r1,:128]!
        bxle            lr
        cmp             r3,  #8
        bgt             1b
2:
        vld1.16         {d0},     [r0,:64]
        vclt.s16        d3,  d0,  #0
        vceq.s16        d1,  d0,  #0
        vmul.s16        d2,  d0,  d30
        vbsl            d3,  d26, d28
        vadd.s16        d2,  d2,  d3
        vbif            d0,  d2,  d1
        vst1.16         {d0},     [r1,:64]
        bx              lr
endfunc

function ff_dct_unquantize_h263_intra_neon, export=1
        push            {r4-r6,lr}
        ldr             r6,  [r0, #AC_PRED]
        add             r5,  r0,  #0x2700
        cmp             r6,  #0
        movne           r12, #63
        bne             1f
        add             lr,  r0,  #0x2200
        add             r12, lr,  #BLOCK_LAST_INDEX-0x2200
        add             lr,  lr,  #INTER_SCANTAB_RASTER_END-0x2200
        ldr             r12, [r12, r2, lsl #2]
        ldrb            r12, [lr, r12]
1:      ldr             r5,  [r5, #H263_AIC-0x2700]
        ldrsh           r4,  [r1]
        cmp             r5,  #0
        mov             r5,  r1
        movne           r2,  #0
        bne             2f
        cmp             r2,  #4
        addge           r0,  r0,  #4
        sub             r2,  r3,  #1
        ldr             r6,  [r0, #Y_DC_SCALE]
        orr             r2,  r2,  #1
        smulbb          r4,  r4,  r6
2:      lsl             r0,  r3,  #1
        add             r3,  r12, #1
        bl              ff_dct_unquantize_h263_neon
        vmov.16         d0[0], r4
        vst1.16         {d0[0]},  [r5]
        pop             {r4-r6,pc}
endfunc