annotate x86/h264_idct_sse2.asm @ 8643:1fa3e8a72ca5 libavcodec

Do not allocate RoqTempData on the stack
author vitor
date Sat, 24 Jan 2009 08:15:43 +0000
parents cea216e44ee3
children e5c9a3a813ea
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8510
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
1 ;*****************************************************************************
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
2 ;* dct-a.asm: h264 encoder library
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
3 ;*****************************************************************************
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
4 ;* Copyright (C) 2003-2008 x264 project
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
5 ;*
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
6 ;* Authors: Laurent Aimar <fenrir@via.ecp.fr>
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
7 ;* Loren Merritt <lorenm@u.washington.edu>
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
8 ;* Holger Lubitz <hal@duncan.ol.sub.de>
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
9 ;* Min Chen <chenm001.163.com>
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
10 ;*
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
11 ;* This program is free software; you can redistribute it and/or modify
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
12 ;* it under the terms of the GNU General Public License as published by
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
13 ;* the Free Software Foundation; either version 2 of the License, or
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
14 ;* (at your option) any later version.
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
15 ;*
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
16 ;* This program is distributed in the hope that it will be useful,
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
17 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
18 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
19 ;* GNU General Public License for more details.
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
20 ;*
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
21 ;* You should have received a copy of the GNU General Public License
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
22 ;* along with this program; if not, write to the Free Software
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
23 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
24 ;*****************************************************************************
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
25
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
26 %include "x86inc.asm"
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
27 %include "x86util.asm"
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
28
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
29 SECTION_RODATA
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
30 pw_32: times 8 dw 32
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
31
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
32 SECTION .text
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
33
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
34 %macro IDCT4_1D 6
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
35 SUMSUB_BA m%3, m%1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
36 SUMSUBD2_AB m%2, m%4, m%6, m%5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
37 SUMSUB_BADC m%2, m%3, m%5, m%1
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
38 SWAP %1, %2, %5, %4, %3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
39 %endmacro
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
40
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
41 INIT_XMM
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
42 cglobal x264_add8x4_idct_sse2, 3,3
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
43 movq m0, [r1+ 0]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
44 movq m1, [r1+ 8]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
45 movq m2, [r1+16]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
46 movq m3, [r1+24]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
47 movhps m0, [r1+32]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
48 movhps m1, [r1+40]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
49 movhps m2, [r1+48]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
50 movhps m3, [r1+56]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
51 IDCT4_1D 0,1,2,3,4,5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
52 TRANSPOSE2x4x4W 0,1,2,3,4
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
53 paddw m0, [pw_32 GLOBAL]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
54 IDCT4_1D 0,1,2,3,4,5
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
55 pxor m7, m7
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
56 STORE_DIFF m0, m4, m7, [r0]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
57 STORE_DIFF m1, m4, m7, [r0+r2]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
58 lea r0, [r0+r2*2]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
59 STORE_DIFF m2, m4, m7, [r0]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
60 STORE_DIFF m3, m4, m7, [r0+r2]
cea216e44ee3 Add x264 SSE2 iDCT functions to H.264 decoder.
darkshikari
parents:
diff changeset
61 RET