annotate x86/h264_idct.asm @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 0b6bd91bbe57
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
1 ;*****************************************************************************
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
2 ;* MMX/SSE2-optimized H.264 iDCT
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
3 ;*****************************************************************************
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
4 ;* Copyright (C) 2004-2005 Michael Niedermayer, Loren Merritt
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
5 ;* Copyright (C) 2003-2008 x264 project
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
6 ;*
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
7 ;* Authors: Laurent Aimar <fenrir@via.ecp.fr>
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
8 ;* Loren Merritt <lorenm@u.washington.edu>
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
9 ;* Holger Lubitz <hal@duncan.ol.sub.de>
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
10 ;* Min Chen <chenm001.163.com>
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
11 ;*
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
12 ;* This file is part of FFmpeg.
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
13 ;*
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
14 ;* FFmpeg is free software; you can redistribute it and/or
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
15 ;* modify it under the terms of the GNU Lesser General Public
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
16 ;* License as published by the Free Software Foundation; either
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
17 ;* version 2.1 of the License, or (at your option) any later version.
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
18 ;*
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
19 ;* FFmpeg is distributed in the hope that it will be useful,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
20 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
21 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
22 ;* Lesser General Public License for more details.
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
23 ;*
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
24 ;* You should have received a copy of the GNU Lesser General Public
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
25 ;* License along with FFmpeg; if not, write to the Free Software
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
26 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
27 ;*****************************************************************************
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
28
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
29 %include "x86inc.asm"
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
30 %include "x86util.asm"
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
31
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
32 SECTION_RODATA
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
33
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
34 ; FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
35 scan8_mem: db 4+1*8, 5+1*8, 4+2*8, 5+2*8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
36 db 6+1*8, 7+1*8, 6+2*8, 7+2*8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
37 db 4+3*8, 5+3*8, 4+4*8, 5+4*8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
38 db 6+3*8, 7+3*8, 6+4*8, 7+4*8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
39 db 1+1*8, 2+1*8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
40 db 1+2*8, 2+2*8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
41 db 1+4*8, 2+4*8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
42 db 1+5*8, 2+5*8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
43 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
44 %define scan8 r11
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
45 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
46 %define scan8 scan8_mem
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
47 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
48
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
49 cextern pw_32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
50
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
51 SECTION .text
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
52
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
53 ; %1=uint8_t *dst, %2=int16_t *block, %3=int stride
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
54 %macro IDCT4_ADD 3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
55 ; Load dct coeffs
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
56 movq m0, [%2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
57 movq m1, [%2+8]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
58 movq m2, [%2+16]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
59 movq m3, [%2+24]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
60
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
61 IDCT4_1D 0, 1, 2, 3, 4, 5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
62 mova m6, [pw_32]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
63 TRANSPOSE4x4W 0, 1, 2, 3, 4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
64 paddw m0, m6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
65 IDCT4_1D 0, 1, 2, 3, 4, 5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
66 pxor m7, m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
67
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
68 STORE_DIFFx2 m0, m1, m4, m5, m7, 6, %1, %3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
69 lea %1, [%1+%3*2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
70 STORE_DIFFx2 m2, m3, m4, m5, m7, 6, %1, %3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
71 %endmacro
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
72
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
73 INIT_MMX
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
74 ; ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride)
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
75 cglobal h264_idct_add_mmx, 3, 3, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
76 IDCT4_ADD r0, r1, r2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
77 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
78
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
79 %macro IDCT8_1D 2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
80 mova m4, m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
81 mova m0, m1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
82 psraw m4, 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
83 psraw m1, 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
84 paddw m4, m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
85 paddw m1, m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
86 paddw m4, m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
87 paddw m1, m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
88 psubw m4, m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
89 paddw m1, m3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
90
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
91 psubw m0, m3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
92 psubw m5, m3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
93 paddw m0, m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
94 psubw m5, m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
95 psraw m3, 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
96 psraw m7, 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
97 psubw m0, m3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
98 psubw m5, m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
99
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
100 mova m3, m4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
101 mova m7, m1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
102 psraw m1, 2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
103 psraw m3, 2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
104 paddw m3, m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
105 psraw m0, 2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
106 paddw m1, m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
107 psraw m5, 2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
108 psubw m0, m4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
109 psubw m7, m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
110
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
111 mova m4, m2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
112 mova m5, m6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
113 psraw m4, 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
114 psraw m6, 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
115 psubw m4, m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
116 paddw m6, m2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
117
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
118 mova m2, %1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
119 mova m5, %2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
120 SUMSUB_BA m5, m2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
121 SUMSUB_BA m6, m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
122 SUMSUB_BA m4, m2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
123 SUMSUB_BA m7, m6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
124 SUMSUB_BA m0, m4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
125 SUMSUB_BA m3, m2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
126 SUMSUB_BA m1, m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
127 SWAP 7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
128 %endmacro
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
129
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
130 %macro IDCT8_1D_FULL 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
131 mova m7, [%1+112]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
132 mova m6, [%1+ 96]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
133 mova m5, [%1+ 80]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
134 mova m3, [%1+ 48]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
135 mova m2, [%1+ 32]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
136 mova m1, [%1+ 16]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
137 IDCT8_1D [%1], [%1+ 64]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
138 %endmacro
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
139
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
140 ; %1=int16_t *block, %2=int16_t *dstblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
141 %macro IDCT8_ADD_MMX_START 2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
142 IDCT8_1D_FULL %1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
143 mova [%1], m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
144 TRANSPOSE4x4W 0, 1, 2, 3, 7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
145 mova m7, [%1]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
146 mova [%2 ], m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
147 mova [%2+16], m1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
148 mova [%2+32], m2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
149 mova [%2+48], m3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
150 TRANSPOSE4x4W 4, 5, 6, 7, 3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
151 mova [%2+ 8], m4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
152 mova [%2+24], m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
153 mova [%2+40], m6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
154 mova [%2+56], m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
155 %endmacro
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
156
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
157 ; %1=uint8_t *dst, %2=int16_t *block, %3=int stride
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
158 %macro IDCT8_ADD_MMX_END 3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
159 IDCT8_1D_FULL %2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
160 mova [%2 ], m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
161 mova [%2+16], m6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
162 mova [%2+32], m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
163
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
164 pxor m7, m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
165 STORE_DIFFx2 m0, m1, m5, m6, m7, 6, %1, %3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
166 lea %1, [%1+%3*2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
167 STORE_DIFFx2 m2, m3, m5, m6, m7, 6, %1, %3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
168 mova m0, [%2 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
169 mova m1, [%2+16]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
170 mova m2, [%2+32]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
171 lea %1, [%1+%3*2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
172 STORE_DIFFx2 m4, m0, m5, m6, m7, 6, %1, %3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
173 lea %1, [%1+%3*2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
174 STORE_DIFFx2 m1, m2, m5, m6, m7, 6, %1, %3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
175 %endmacro
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
176
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
177 INIT_MMX
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
178 ; ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
179 cglobal h264_idct8_add_mmx, 3, 4, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
180 %assign pad 128+4-(stack_offset&7)
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
181 SUB rsp, pad
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
182
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
183 add word [r1], 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
184 IDCT8_ADD_MMX_START r1 , rsp
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
185 IDCT8_ADD_MMX_START r1+8, rsp+64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
186 lea r3, [r0+4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
187 IDCT8_ADD_MMX_END r0 , rsp, r2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
188 IDCT8_ADD_MMX_END r3 , rsp+8, r2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
189
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
190 ADD rsp, pad
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
191 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
192
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
193 ; %1=uint8_t *dst, %2=int16_t *block, %3=int stride
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
194 %macro IDCT8_ADD_SSE 4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
195 IDCT8_1D_FULL %2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
196 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
197 TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
198 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
199 TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [%2], [%2+16]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
200 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
201 paddw m0, [pw_32]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
202
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
203 %ifndef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
204 mova [%2 ], m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
205 mova [%2+16], m4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
206 IDCT8_1D [%2], [%2+ 16]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
207 mova [%2 ], m6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
208 mova [%2+16], m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
209 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
210 SWAP 0, 8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
211 SWAP 4, 9
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
212 IDCT8_1D m8, m9
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
213 SWAP 6, 8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
214 SWAP 7, 9
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
215 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
216
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
217 pxor m7, m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
218 lea %4, [%3*3]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
219 STORE_DIFF m0, m6, m7, [%1 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
220 STORE_DIFF m1, m6, m7, [%1+%3 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
221 STORE_DIFF m2, m6, m7, [%1+%3*2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
222 STORE_DIFF m3, m6, m7, [%1+%4 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
223 %ifndef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
224 mova m0, [%2 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
225 mova m1, [%2+16]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
226 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
227 SWAP 0, 8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
228 SWAP 1, 9
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
229 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
230 lea %1, [%1+%3*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
231 STORE_DIFF m4, m6, m7, [%1 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
232 STORE_DIFF m5, m6, m7, [%1+%3 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
233 STORE_DIFF m0, m6, m7, [%1+%3*2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
234 STORE_DIFF m1, m6, m7, [%1+%4 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
235 %endmacro
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
236
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
237 INIT_XMM
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
238 ; ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride)
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
239 cglobal h264_idct8_add_sse2, 3, 4, 10
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
240 IDCT8_ADD_SSE r0, r1, r2, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
241 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
242
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
243 %macro DC_ADD_MMX2_INIT 2-3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
244 %if %0 == 2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
245 movsx %1, word [%1]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
246 add %1, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
247 sar %1, 6
12519
0b6bd91bbe57 Add d suffix to movd target register to make it work with nasm.
reimar
parents: 12511
diff changeset
248 movd m0, %1d
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
249 lea %1, [%2*3]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
250 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
251 add %3, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
252 sar %3, 6
12519
0b6bd91bbe57 Add d suffix to movd target register to make it work with nasm.
reimar
parents: 12511
diff changeset
253 movd m0, %3d
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
254 lea %3, [%2*3]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
255 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
256 pshufw m0, m0, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
257 pxor m1, m1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
258 psubw m1, m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
259 packuswb m0, m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
260 packuswb m1, m1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
261 %endmacro
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
262
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
263 %macro DC_ADD_MMX2_OP 3-4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
264 %1 m2, [%2 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
265 %1 m3, [%2+%3 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
266 %1 m4, [%2+%3*2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
267 %1 m5, [%2+%4 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
268 paddusb m2, m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
269 paddusb m3, m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
270 paddusb m4, m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
271 paddusb m5, m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
272 psubusb m2, m1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
273 psubusb m3, m1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
274 psubusb m4, m1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
275 psubusb m5, m1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
276 %1 [%2 ], m2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
277 %1 [%2+%3 ], m3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
278 %1 [%2+%3*2], m4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
279 %1 [%2+%4 ], m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
280 %endmacro
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
281
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
282 INIT_MMX
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
283 ; ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
284 cglobal h264_idct_dc_add_mmx2, 3, 3, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
285 DC_ADD_MMX2_INIT r1, r2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
286 DC_ADD_MMX2_OP movh, r0, r2, r1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
287 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
288
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
289 ; ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
290 cglobal h264_idct8_dc_add_mmx2, 3, 3, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
291 DC_ADD_MMX2_INIT r1, r2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
292 DC_ADD_MMX2_OP mova, r0, r2, r1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
293 lea r0, [r0+r2*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
294 DC_ADD_MMX2_OP mova, r0, r2, r1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
295 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
296
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
297 ; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
298 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
299 cglobal h264_idct_add16_mmx, 5, 7, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
300 xor r5, r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
301 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
302 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
303 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
304 .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
305 movzx r6, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
306 movzx r6, byte [r4+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
307 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
308 jz .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
309 mov r6d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
310 lea r6, [r0+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
311 IDCT4_ADD r6, r2, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
312 .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
313 inc r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
314 add r2, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
315 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
316 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
317 REP_RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
318
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
319 ; ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
320 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
321 cglobal h264_idct8_add4_mmx, 5, 7, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
322 %assign pad 128+4-(stack_offset&7)
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
323 SUB rsp, pad
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
324
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
325 xor r5, r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
326 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
327 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
328 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
329 .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
330 movzx r6, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
331 movzx r6, byte [r4+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
332 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
333 jz .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
334 mov r6d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
335 lea r6, [r0+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
336 add word [r2], 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
337 IDCT8_ADD_MMX_START r2 , rsp
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
338 IDCT8_ADD_MMX_START r2+8, rsp+64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
339 IDCT8_ADD_MMX_END r6 , rsp, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
340 mov r6d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
341 lea r6, [r0+r6+4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
342 IDCT8_ADD_MMX_END r6 , rsp+8, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
343 .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
344 add r5, 4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
345 add r2, 128
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
346 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
347 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
348 ADD rsp, pad
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
349 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
350
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
351 ; ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
352 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
353 cglobal h264_idct_add16_mmx2, 5, 7, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
354 xor r5, r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
355 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
356 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
357 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
358 .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
359 movzx r6, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
360 movzx r6, byte [r4+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
361 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
362 jz .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
363 cmp r6, 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
364 jnz .no_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
365 movsx r6, word [r2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
366 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
367 jz .no_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
368 DC_ADD_MMX2_INIT r2, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
369 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
370 %define dst_reg r10
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
371 %define dst_regd r10d
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
372 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
373 %define dst_reg r1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
374 %define dst_regd r1d
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
375 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
376 mov dst_regd, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
377 lea dst_reg, [r0+dst_reg]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
378 DC_ADD_MMX2_OP movh, dst_reg, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
379 %ifndef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
380 mov r1, r1m
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
381 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
382 inc r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
383 add r2, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
384 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
385 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
386 REP_RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
387 .no_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
388 mov r6d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
389 lea r6, [r0+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
390 IDCT4_ADD r6, r2, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
391 .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
392 inc r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
393 add r2, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
394 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
395 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
396 REP_RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
397
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
398 ; ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
399 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
400 cglobal h264_idct_add16intra_mmx, 5, 7, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
401 xor r5, r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
402 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
403 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
404 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
405 .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
406 movzx r6, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
407 movzx r6, byte [r4+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
408 or r6w, word [r2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
409 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
410 jz .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
411 mov r6d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
412 lea r6, [r0+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
413 IDCT4_ADD r6, r2, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
414 .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
415 inc r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
416 add r2, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
417 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
418 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
419 REP_RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
420
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
421 ; ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
422 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
423 cglobal h264_idct_add16intra_mmx2, 5, 7, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
424 xor r5, r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
425 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
426 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
427 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
428 .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
429 movzx r6, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
430 movzx r6, byte [r4+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
431 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
432 jz .try_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
433 mov r6d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
434 lea r6, [r0+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
435 IDCT4_ADD r6, r2, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
436 inc r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
437 add r2, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
438 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
439 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
440 REP_RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
441 .try_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
442 movsx r6, word [r2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
443 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
444 jz .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
445 DC_ADD_MMX2_INIT r2, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
446 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
447 %define dst_reg r10
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
448 %define dst_regd r10d
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
449 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
450 %define dst_reg r1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
451 %define dst_regd r1d
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
452 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
453 mov dst_regd, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
454 lea dst_reg, [r0+dst_reg]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
455 DC_ADD_MMX2_OP movh, dst_reg, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
456 %ifndef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
457 mov r1, r1m
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
458 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
459 .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
460 inc r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
461 add r2, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
462 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
463 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
464 REP_RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
465
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
466 ; ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
467 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
468 cglobal h264_idct8_add4_mmx2, 5, 7, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
469 %assign pad 128+4-(stack_offset&7)
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
470 SUB rsp, pad
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
471
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
472 xor r5, r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
473 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
474 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
475 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
476 .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
477 movzx r6, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
478 movzx r6, byte [r4+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
479 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
480 jz .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
481 cmp r6, 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
482 jnz .no_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
483 movsx r6, word [r2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
484 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
485 jz .no_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
486 DC_ADD_MMX2_INIT r2, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
487 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
488 %define dst_reg r10
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
489 %define dst_regd r10d
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
490 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
491 %define dst_reg r1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
492 %define dst_regd r1d
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
493 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
494 mov dst_regd, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
495 lea dst_reg, [r0+dst_reg]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
496 DC_ADD_MMX2_OP mova, dst_reg, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
497 lea dst_reg, [dst_reg+r3*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
498 DC_ADD_MMX2_OP mova, dst_reg, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
499 %ifndef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
500 mov r1, r1m
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
501 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
502 add r5, 4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
503 add r2, 128
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
504 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
505 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
506
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
507 ADD rsp, pad
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
508 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
509 .no_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
510 mov r6d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
511 lea r6, [r0+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
512 add word [r2], 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
513 IDCT8_ADD_MMX_START r2 , rsp
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
514 IDCT8_ADD_MMX_START r2+8, rsp+64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
515 IDCT8_ADD_MMX_END r6 , rsp, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
516 mov r6d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
517 lea r6, [r0+r6+4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
518 IDCT8_ADD_MMX_END r6 , rsp+8, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
519 .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
520 add r5, 4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
521 add r2, 128
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
522 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
523 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
524
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
525 ADD rsp, pad
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
526 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
527
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
528 INIT_XMM
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
529 ; ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
530 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
531 cglobal h264_idct8_add4_sse2, 5, 7, 10
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
532 xor r5, r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
533 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
534 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
535 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
536 .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
537 movzx r6, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
538 movzx r6, byte [r4+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
539 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
540 jz .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
541 cmp r6, 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
542 jnz .no_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
543 movsx r6, word [r2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
544 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
545 jz .no_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
546 INIT_MMX
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
547 DC_ADD_MMX2_INIT r2, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
548 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
549 %define dst_reg r10
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
550 %define dst_regd r10d
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
551 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
552 %define dst_reg r1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
553 %define dst_regd r1d
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
554 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
555 mov dst_regd, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
556 lea dst_reg, [r0+dst_reg]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
557 DC_ADD_MMX2_OP mova, dst_reg, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
558 lea dst_reg, [dst_reg+r3*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
559 DC_ADD_MMX2_OP mova, dst_reg, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
560 %ifndef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
561 mov r1, r1m
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
562 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
563 add r5, 4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
564 add r2, 128
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
565 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
566 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
567 REP_RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
568 .no_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
569 INIT_XMM
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
570 mov dst_regd, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
571 lea dst_reg, [r0+dst_reg]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
572 IDCT8_ADD_SSE dst_reg, r2, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
573 %ifndef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
574 mov r1, r1m
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
575 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
576 .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
577 add r5, 4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
578 add r2, 128
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
579 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
580 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
581 REP_RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
582
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
583 INIT_MMX
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
584 h264_idct_add8_mmx_plane:
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
585 .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
586 movzx r6, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
587 movzx r6, byte [r4+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
588 or r6w, word [r2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
589 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
590 jz .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
591 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
592 mov r0d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
593 add r0, [r10]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
594 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
595 mov r0, r1m ; XXX r1m here is actually r0m of the calling func
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
596 mov r0, [r0]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
597 add r0, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
598 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
599 IDCT4_ADD r0, r2, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
600 .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
601 inc r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
602 add r2, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
603 test r5, 3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
604 jnz .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
605 rep ret
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
606
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
607 ; ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
608 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
609 cglobal h264_idct_add8_mmx, 5, 7, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
610 mov r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
611 add r2, 512
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
612 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
613 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
614 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
615 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
616 mov r10, r0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
617 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
618 call h264_idct_add8_mmx_plane
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
619 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
620 add r10, gprsize
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
621 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
622 add r0mp, gprsize
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
623 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
624 call h264_idct_add8_mmx_plane
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
625 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
626
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
627 h264_idct_add8_mmx2_plane
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
628 .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
629 movzx r6, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
630 movzx r6, byte [r4+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
631 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
632 jz .try_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
633 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
634 mov r0d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
635 add r0, [r10]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
636 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
637 mov r0, r1m ; XXX r1m here is actually r0m of the calling func
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
638 mov r0, [r0]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
639 add r0, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
640 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
641 IDCT4_ADD r0, r2, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
642 inc r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
643 add r2, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
644 test r5, 3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
645 jnz .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
646 rep ret
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
647 .try_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
648 movsx r6, word [r2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
649 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
650 jz .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
651 DC_ADD_MMX2_INIT r2, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
652 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
653 mov r0d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
654 add r0, [r10]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
655 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
656 mov r0, r1m ; XXX r1m here is actually r0m of the calling func
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
657 mov r0, [r0]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
658 add r0, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
659 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
660 DC_ADD_MMX2_OP movh, r0, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
661 .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
662 inc r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
663 add r2, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
664 test r5, 3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
665 jnz .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
666 rep ret
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
667
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
668 ; ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
669 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
670 cglobal h264_idct_add8_mmx2, 5, 7, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
671 mov r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
672 add r2, 512
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
673 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
674 mov r10, r0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
675 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
676 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
677 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
678 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
679 call h264_idct_add8_mmx2_plane
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
680 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
681 add r10, gprsize
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
682 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
683 add r0mp, gprsize
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
684 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
685 call h264_idct_add8_mmx2_plane
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
686 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
687
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
688 INIT_MMX
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
689 ; r0 = uint8_t *dst, r2 = int16_t *block, r3 = int stride, r6=clobbered
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
690 h264_idct_dc_add8_mmx2:
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
691 movd m0, [r2 ] ; 0 0 X D
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
692 punpcklwd m0, [r2+32] ; x X d D
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
693 paddsw m0, [pw_32]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
694 psraw m0, 6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
695 punpcklwd m0, m0 ; d d D D
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
696 pxor m1, m1 ; 0 0 0 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
697 psubw m1, m0 ; -d-d-D-D
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
698 packuswb m0, m1 ; -d-d-D-D d d D D
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
699 pshufw m1, m0, 0xFA ; -d-d-d-d-D-D-D-D
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
700 punpcklwd m0, m0 ; d d d d D D D D
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
701 lea r6, [r3*3]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
702 DC_ADD_MMX2_OP movq, r0, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
703 ret
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
704
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
705 ALIGN 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
706 INIT_XMM
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
707 ; r0 = uint8_t *dst (clobbered), r2 = int16_t *block, r3 = int stride
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
708 x264_add8x4_idct_sse2:
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
709 movq m0, [r2+ 0]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
710 movq m1, [r2+ 8]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
711 movq m2, [r2+16]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
712 movq m3, [r2+24]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
713 movhps m0, [r2+32]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
714 movhps m1, [r2+40]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
715 movhps m2, [r2+48]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
716 movhps m3, [r2+56]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
717 IDCT4_1D 0,1,2,3,4,5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
718 TRANSPOSE2x4x4W 0,1,2,3,4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
719 paddw m0, [pw_32]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
720 IDCT4_1D 0,1,2,3,4,5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
721 pxor m7, m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
722 STORE_DIFFx2 m0, m1, m4, m5, m7, 6, r0, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
723 lea r0, [r0+r3*2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
724 STORE_DIFFx2 m2, m3, m4, m5, m7, 6, r0, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
725 ret
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
726
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
727 %macro add16_sse2_cycle 2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
728 movzx r0, word [r4+%2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
729 test r0, r0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
730 jz .cycle%1end
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
731 mov r0d, dword [r1+%1*8]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
732 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
733 add r0, r10
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
734 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
735 add r0, r0m
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
736 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
737 call x264_add8x4_idct_sse2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
738 .cycle%1end
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
739 %if %1 < 7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
740 add r2, 64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
741 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
742 %endmacro
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
743
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
744 ; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
745 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
746 cglobal h264_idct_add16_sse2, 5, 5, 8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
747 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
748 mov r10, r0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
749 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
750 ; unrolling of the loop leads to an average performance gain of
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
751 ; 20-25%
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
752 add16_sse2_cycle 0, 0xc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
753 add16_sse2_cycle 1, 0x14
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
754 add16_sse2_cycle 2, 0xe
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
755 add16_sse2_cycle 3, 0x16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
756 add16_sse2_cycle 4, 0x1c
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
757 add16_sse2_cycle 5, 0x24
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
758 add16_sse2_cycle 6, 0x1e
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
759 add16_sse2_cycle 7, 0x26
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
760 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
761
12511
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
762 %macro add16intra_sse2_cycle 2
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
763 movzx r0, word [r4+%2]
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
764 test r0, r0
12511
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
765 jz .try%1dc
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
766 mov r0d, dword [r1+%1*8]
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
767 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
768 add r0, r10
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
769 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
770 add r0, r0m
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
771 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
772 call x264_add8x4_idct_sse2
12511
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
773 jmp .cycle%1end
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
774 .try%1dc
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
775 movsx r0, word [r2 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
776 or r0w, word [r2+32]
12511
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
777 jz .cycle%1end
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
778 mov r0d, dword [r1+%1*8]
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
779 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
780 add r0, r10
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
781 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
782 add r0, r0m
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
783 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
784 call h264_idct_dc_add8_mmx2
12511
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
785 .cycle%1end
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
786 %if %1 < 7
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
787 add r2, 64
12511
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
788 %endif
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
789 %endmacro
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
790
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
791 ; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset,
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
792 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
793 cglobal h264_idct_add16intra_sse2, 5, 7, 8
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
794 %ifdef ARCH_X86_64
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
795 mov r10, r0
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
796 %endif
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
797 add16intra_sse2_cycle 0, 0xc
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
798 add16intra_sse2_cycle 1, 0x14
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
799 add16intra_sse2_cycle 2, 0xe
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
800 add16intra_sse2_cycle 3, 0x16
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
801 add16intra_sse2_cycle 4, 0x1c
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
802 add16intra_sse2_cycle 5, 0x24
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
803 add16intra_sse2_cycle 6, 0x1e
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
804 add16intra_sse2_cycle 7, 0x26
41ebcc0afb40 Unroll loop in h264_idct_add16intra_sse2(). Basically identical to r25171, this
rbultje
parents: 12510
diff changeset
805 RET
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
806
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
807 %macro add8_sse2_cycle 2
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
808 movzx r0, word [r4+%2]
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
809 test r0, r0
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
810 jz .try%1dc
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
811 %ifdef ARCH_X86_64
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
812 mov r0d, dword [r1+%1*8+64]
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
813 add r0, [r10]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
814 %else
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
815 mov r0, r0m
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
816 mov r0, [r0]
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
817 add r0, dword [r1+%1*8+64]
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
818 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
819 call x264_add8x4_idct_sse2
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
820 jmp .cycle%1end
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
821 .try%1dc
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
822 movsx r0, word [r2 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
823 or r0w, word [r2+32]
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
824 jz .cycle%1end
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
825 %ifdef ARCH_X86_64
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
826 mov r0d, dword [r1+%1*8+64]
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
827 add r0, [r10]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
828 %else
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
829 mov r0, r0m
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
830 mov r0, [r0]
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
831 add r0, dword [r1+%1*8+64]
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
832 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
833 call h264_idct_dc_add8_mmx2
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
834 .cycle%1end
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
835 %if %1 < 3
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
836 add r2, 64
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
837 %endif
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
838 %endmacro
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
839
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
840 ; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
841 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
842 cglobal h264_idct_add8_sse2, 5, 7, 8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
843 add r2, 512
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
844 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
845 mov r10, r0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
846 %endif
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
847 add8_sse2_cycle 0, 0x09
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
848 add8_sse2_cycle 1, 0x11
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
849 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
850 add r10, gprsize
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
851 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
852 add r0mp, gprsize
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
853 %endif
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
854 add8_sse2_cycle 2, 0x21
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
855 add8_sse2_cycle 3, 0x29
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
856 RET