Mercurial > libavcodec.hg
diff x86/h264_idct_sse2.asm @ 9006:37ac731fe32c libavcodec
Convert x264 asm files to proper unix line breaks
author | darkshikari |
---|---|
date | Sun, 22 Feb 2009 11:35:32 +0000 |
parents | e5c9a3a813ea |
children | c08ca946c80a |
line wrap: on
line diff
--- a/x86/h264_idct_sse2.asm Sun Feb 22 11:33:09 2009 +0000 +++ b/x86/h264_idct_sse2.asm Sun Feb 22 11:35:32 2009 +0000 @@ -1,61 +1,61 @@ -;***************************************************************************** -;* SSE2-optimized H.264 iDCT -;***************************************************************************** -;* Copyright (C) 2003-2008 x264 project -;* -;* Authors: Laurent Aimar <fenrir@via.ecp.fr> -;* Loren Merritt <lorenm@u.washington.edu> -;* Holger Lubitz <hal@duncan.ol.sub.de> -;* Min Chen <chenm001.163.com> -;* -;* This program is free software; you can redistribute it and/or modify -;* it under the terms of the GNU General Public License as published by -;* the Free Software Foundation; either version 2 of the License, or -;* (at your option) any later version. -;* -;* This program is distributed in the hope that it will be useful, -;* but WITHOUT ANY WARRANTY; without even the implied warranty of -;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;* GNU General Public License for more details. -;* -;* You should have received a copy of the GNU General Public License -;* along with this program; if not, write to the Free Software -;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. -;***************************************************************************** - -%include "x86inc.asm" -%include "x86util.asm" - -SECTION_RODATA -pw_32: times 8 dw 32 - -SECTION .text - -%macro IDCT4_1D 6 - SUMSUB_BA m%3, m%1 - SUMSUBD2_AB m%2, m%4, m%6, m%5 - SUMSUB_BADC m%2, m%3, m%5, m%1 - SWAP %1, %2, %5, %4, %3 -%endmacro - -INIT_XMM -cglobal x264_add8x4_idct_sse2, 3,3 - movq m0, [r1+ 0] - movq m1, [r1+ 8] - movq m2, [r1+16] - movq m3, [r1+24] - movhps m0, [r1+32] - movhps m1, [r1+40] - movhps m2, [r1+48] - movhps m3, [r1+56] - IDCT4_1D 0,1,2,3,4,5 - TRANSPOSE2x4x4W 0,1,2,3,4 - paddw m0, [pw_32 GLOBAL] - IDCT4_1D 0,1,2,3,4,5 - pxor m7, m7 - STORE_DIFF m0, m4, m7, [r0] - STORE_DIFF m1, m4, m7, [r0+r2] - lea r0, [r0+r2*2] - STORE_DIFF m2, m4, m7, [r0] - STORE_DIFF m3, m4, m7, [r0+r2] - RET +;***************************************************************************** +;* SSE2-optimized H.264 iDCT +;***************************************************************************** +;* Copyright (C) 2003-2008 x264 project +;* +;* Authors: Laurent Aimar <fenrir@via.ecp.fr> +;* Loren Merritt <lorenm@u.washington.edu> +;* Holger Lubitz <hal@duncan.ol.sub.de> +;* Min Chen <chenm001.163.com> +;* +;* This program is free software; you can redistribute it and/or modify +;* it under the terms of the GNU General Public License as published by +;* the Free Software Foundation; either version 2 of the License, or +;* (at your option) any later version. +;* +;* This program is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;* GNU General Public License for more details. +;* +;* You should have received a copy of the GNU General Public License +;* along with this program; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. +;***************************************************************************** + +%include "x86inc.asm" +%include "x86util.asm" + +SECTION_RODATA +pw_32: times 8 dw 32 + +SECTION .text + +%macro IDCT4_1D 6 + SUMSUB_BA m%3, m%1 + SUMSUBD2_AB m%2, m%4, m%6, m%5 + SUMSUB_BADC m%2, m%3, m%5, m%1 + SWAP %1, %2, %5, %4, %3 +%endmacro + +INIT_XMM +cglobal x264_add8x4_idct_sse2, 3,3 + movq m0, [r1+ 0] + movq m1, [r1+ 8] + movq m2, [r1+16] + movq m3, [r1+24] + movhps m0, [r1+32] + movhps m1, [r1+40] + movhps m2, [r1+48] + movhps m3, [r1+56] + IDCT4_1D 0,1,2,3,4,5 + TRANSPOSE2x4x4W 0,1,2,3,4 + paddw m0, [pw_32 GLOBAL] + IDCT4_1D 0,1,2,3,4,5 + pxor m7, m7 + STORE_DIFF m0, m4, m7, [r0] + STORE_DIFF m1, m4, m7, [r0+r2] + lea r0, [r0+r2*2] + STORE_DIFF m2, m4, m7, [r0] + STORE_DIFF m3, m4, m7, [r0+r2] + RET