Mercurial > libavcodec.hg
annotate x86/h264_idct_sse2.asm @ 9005:e5c9a3a813ea libavcodec
Remove (incorrect) filenames from x264 asm files, add descriptions.
author | darkshikari |
---|---|
date | Sun, 22 Feb 2009 11:33:09 +0000 |
parents | cea216e44ee3 |
children | 37ac731fe32c |
rev | line source |
---|---|
8510 | 1 ;***************************************************************************** |
9005
e5c9a3a813ea
Remove (incorrect) filenames from x264 asm files, add descriptions.
darkshikari
parents:
8510
diff
changeset
|
2 ;* SSE2-optimized H.264 iDCT |
8510 | 3 ;***************************************************************************** |
4 ;* Copyright (C) 2003-2008 x264 project | |
5 ;* | |
6 ;* Authors: Laurent Aimar <fenrir@via.ecp.fr> | |
7 ;* Loren Merritt <lorenm@u.washington.edu> | |
8 ;* Holger Lubitz <hal@duncan.ol.sub.de> | |
9 ;* Min Chen <chenm001.163.com> | |
10 ;* | |
11 ;* This program is free software; you can redistribute it and/or modify | |
12 ;* it under the terms of the GNU General Public License as published by | |
13 ;* the Free Software Foundation; either version 2 of the License, or | |
14 ;* (at your option) any later version. | |
15 ;* | |
16 ;* This program is distributed in the hope that it will be useful, | |
17 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 ;* GNU General Public License for more details. | |
20 ;* | |
21 ;* You should have received a copy of the GNU General Public License | |
22 ;* along with this program; if not, write to the Free Software | |
23 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
24 ;***************************************************************************** | |
25 | |
26 %include "x86inc.asm" | |
27 %include "x86util.asm" | |
28 | |
29 SECTION_RODATA | |
30 pw_32: times 8 dw 32 | |
31 | |
32 SECTION .text | |
33 | |
34 %macro IDCT4_1D 6 | |
35 SUMSUB_BA m%3, m%1 | |
36 SUMSUBD2_AB m%2, m%4, m%6, m%5 | |
37 SUMSUB_BADC m%2, m%3, m%5, m%1 | |
38 SWAP %1, %2, %5, %4, %3 | |
39 %endmacro | |
40 | |
41 INIT_XMM | |
42 cglobal x264_add8x4_idct_sse2, 3,3 | |
43 movq m0, [r1+ 0] | |
44 movq m1, [r1+ 8] | |
45 movq m2, [r1+16] | |
46 movq m3, [r1+24] | |
47 movhps m0, [r1+32] | |
48 movhps m1, [r1+40] | |
49 movhps m2, [r1+48] | |
50 movhps m3, [r1+56] | |
51 IDCT4_1D 0,1,2,3,4,5 | |
52 TRANSPOSE2x4x4W 0,1,2,3,4 | |
53 paddw m0, [pw_32 GLOBAL] | |
54 IDCT4_1D 0,1,2,3,4,5 | |
55 pxor m7, m7 | |
56 STORE_DIFF m0, m4, m7, [r0] | |
57 STORE_DIFF m1, m4, m7, [r0+r2] | |
58 lea r0, [r0+r2*2] | |
59 STORE_DIFF m2, m4, m7, [r0] | |
60 STORE_DIFF m3, m4, m7, [r0+r2] | |
61 RET |