Mercurial > libavcodec.hg
view x86/h264_idct_sse2.asm @ 8969:9218ef5d5afb libavcodec
Clarify get_ue_golomb_31() behavior with >31.
author | michael |
---|---|
date | Wed, 18 Feb 2009 11:57:11 +0000 |
parents | cea216e44ee3 |
children | e5c9a3a813ea |
line wrap: on
line source
;***************************************************************************** ;* dct-a.asm: h264 encoder library ;***************************************************************************** ;* Copyright (C) 2003-2008 x264 project ;* ;* Authors: Laurent Aimar <fenrir@via.ecp.fr> ;* Loren Merritt <lorenm@u.washington.edu> ;* Holger Lubitz <hal@duncan.ol.sub.de> ;* Min Chen <chenm001.163.com> ;* ;* This program is free software; you can redistribute it and/or modify ;* it under the terms of the GNU General Public License as published by ;* the Free Software Foundation; either version 2 of the License, or ;* (at your option) any later version. ;* ;* This program is distributed in the hope that it will be useful, ;* but WITHOUT ANY WARRANTY; without even the implied warranty of ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;* GNU General Public License for more details. ;* ;* You should have received a copy of the GNU General Public License ;* along with this program; if not, write to the Free Software ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. ;***************************************************************************** %include "x86inc.asm" %include "x86util.asm" SECTION_RODATA pw_32: times 8 dw 32 SECTION .text %macro IDCT4_1D 6 SUMSUB_BA m%3, m%1 SUMSUBD2_AB m%2, m%4, m%6, m%5 SUMSUB_BADC m%2, m%3, m%5, m%1 SWAP %1, %2, %5, %4, %3 %endmacro INIT_XMM cglobal x264_add8x4_idct_sse2, 3,3 movq m0, [r1+ 0] movq m1, [r1+ 8] movq m2, [r1+16] movq m3, [r1+24] movhps m0, [r1+32] movhps m1, [r1+40] movhps m2, [r1+48] movhps m3, [r1+56] IDCT4_1D 0,1,2,3,4,5 TRANSPOSE2x4x4W 0,1,2,3,4 paddw m0, [pw_32 GLOBAL] IDCT4_1D 0,1,2,3,4,5 pxor m7, m7 STORE_DIFF m0, m4, m7, [r0] STORE_DIFF m1, m4, m7, [r0+r2] lea r0, [r0+r2*2] STORE_DIFF m2, m4, m7, [r0] STORE_DIFF m3, m4, m7, [r0+r2] RET