annotate x86/h264_idct.asm @ 12510:ef2f2db5b7be libavcodec

Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the code directly also and remove loop setup. 20% faster in function, 0.8% overall. See "[PATCH] unroll loop in h264_idct_add8_sse2()" thread on ML.
author rbultje
date Fri, 24 Sep 2010 14:05:45 +0000
parents 58a960d6e34c
children 41ebcc0afb40
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
1 ;*****************************************************************************
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
2 ;* MMX/SSE2-optimized H.264 iDCT
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
3 ;*****************************************************************************
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
4 ;* Copyright (C) 2004-2005 Michael Niedermayer, Loren Merritt
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
5 ;* Copyright (C) 2003-2008 x264 project
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
6 ;*
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
7 ;* Authors: Laurent Aimar <fenrir@via.ecp.fr>
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
8 ;* Loren Merritt <lorenm@u.washington.edu>
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
9 ;* Holger Lubitz <hal@duncan.ol.sub.de>
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
10 ;* Min Chen <chenm001.163.com>
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
11 ;*
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
12 ;* This file is part of FFmpeg.
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
13 ;*
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
14 ;* FFmpeg is free software; you can redistribute it and/or
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
15 ;* modify it under the terms of the GNU Lesser General Public
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
16 ;* License as published by the Free Software Foundation; either
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
17 ;* version 2.1 of the License, or (at your option) any later version.
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
18 ;*
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
19 ;* FFmpeg is distributed in the hope that it will be useful,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
20 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
21 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
22 ;* Lesser General Public License for more details.
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
23 ;*
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
24 ;* You should have received a copy of the GNU Lesser General Public
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
25 ;* License along with FFmpeg; if not, write to the Free Software
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
26 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
27 ;*****************************************************************************
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
28
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
29 %include "x86inc.asm"
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
30 %include "x86util.asm"
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
31
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
32 SECTION_RODATA
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
33
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
34 ; FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
35 scan8_mem: db 4+1*8, 5+1*8, 4+2*8, 5+2*8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
36 db 6+1*8, 7+1*8, 6+2*8, 7+2*8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
37 db 4+3*8, 5+3*8, 4+4*8, 5+4*8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
38 db 6+3*8, 7+3*8, 6+4*8, 7+4*8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
39 db 1+1*8, 2+1*8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
40 db 1+2*8, 2+2*8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
41 db 1+4*8, 2+4*8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
42 db 1+5*8, 2+5*8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
43 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
44 %define scan8 r11
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
45 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
46 %define scan8 scan8_mem
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
47 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
48
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
49 cextern pw_32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
50
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
51 SECTION .text
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
52
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
53 ; %1=uint8_t *dst, %2=int16_t *block, %3=int stride
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
54 %macro IDCT4_ADD 3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
55 ; Load dct coeffs
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
56 movq m0, [%2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
57 movq m1, [%2+8]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
58 movq m2, [%2+16]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
59 movq m3, [%2+24]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
60
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
61 IDCT4_1D 0, 1, 2, 3, 4, 5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
62 mova m6, [pw_32]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
63 TRANSPOSE4x4W 0, 1, 2, 3, 4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
64 paddw m0, m6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
65 IDCT4_1D 0, 1, 2, 3, 4, 5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
66 pxor m7, m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
67
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
68 STORE_DIFFx2 m0, m1, m4, m5, m7, 6, %1, %3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
69 lea %1, [%1+%3*2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
70 STORE_DIFFx2 m2, m3, m4, m5, m7, 6, %1, %3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
71 %endmacro
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
72
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
73 INIT_MMX
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
74 ; ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride)
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
75 cglobal h264_idct_add_mmx, 3, 3, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
76 IDCT4_ADD r0, r1, r2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
77 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
78
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
79 %macro IDCT8_1D 2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
80 mova m4, m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
81 mova m0, m1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
82 psraw m4, 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
83 psraw m1, 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
84 paddw m4, m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
85 paddw m1, m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
86 paddw m4, m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
87 paddw m1, m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
88 psubw m4, m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
89 paddw m1, m3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
90
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
91 psubw m0, m3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
92 psubw m5, m3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
93 paddw m0, m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
94 psubw m5, m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
95 psraw m3, 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
96 psraw m7, 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
97 psubw m0, m3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
98 psubw m5, m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
99
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
100 mova m3, m4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
101 mova m7, m1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
102 psraw m1, 2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
103 psraw m3, 2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
104 paddw m3, m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
105 psraw m0, 2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
106 paddw m1, m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
107 psraw m5, 2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
108 psubw m0, m4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
109 psubw m7, m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
110
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
111 mova m4, m2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
112 mova m5, m6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
113 psraw m4, 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
114 psraw m6, 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
115 psubw m4, m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
116 paddw m6, m2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
117
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
118 mova m2, %1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
119 mova m5, %2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
120 SUMSUB_BA m5, m2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
121 SUMSUB_BA m6, m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
122 SUMSUB_BA m4, m2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
123 SUMSUB_BA m7, m6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
124 SUMSUB_BA m0, m4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
125 SUMSUB_BA m3, m2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
126 SUMSUB_BA m1, m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
127 SWAP 7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
128 %endmacro
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
129
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
130 %macro IDCT8_1D_FULL 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
131 mova m7, [%1+112]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
132 mova m6, [%1+ 96]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
133 mova m5, [%1+ 80]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
134 mova m3, [%1+ 48]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
135 mova m2, [%1+ 32]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
136 mova m1, [%1+ 16]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
137 IDCT8_1D [%1], [%1+ 64]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
138 %endmacro
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
139
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
140 ; %1=int16_t *block, %2=int16_t *dstblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
141 %macro IDCT8_ADD_MMX_START 2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
142 IDCT8_1D_FULL %1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
143 mova [%1], m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
144 TRANSPOSE4x4W 0, 1, 2, 3, 7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
145 mova m7, [%1]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
146 mova [%2 ], m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
147 mova [%2+16], m1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
148 mova [%2+32], m2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
149 mova [%2+48], m3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
150 TRANSPOSE4x4W 4, 5, 6, 7, 3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
151 mova [%2+ 8], m4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
152 mova [%2+24], m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
153 mova [%2+40], m6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
154 mova [%2+56], m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
155 %endmacro
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
156
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
157 ; %1=uint8_t *dst, %2=int16_t *block, %3=int stride
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
158 %macro IDCT8_ADD_MMX_END 3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
159 IDCT8_1D_FULL %2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
160 mova [%2 ], m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
161 mova [%2+16], m6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
162 mova [%2+32], m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
163
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
164 pxor m7, m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
165 STORE_DIFFx2 m0, m1, m5, m6, m7, 6, %1, %3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
166 lea %1, [%1+%3*2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
167 STORE_DIFFx2 m2, m3, m5, m6, m7, 6, %1, %3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
168 mova m0, [%2 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
169 mova m1, [%2+16]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
170 mova m2, [%2+32]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
171 lea %1, [%1+%3*2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
172 STORE_DIFFx2 m4, m0, m5, m6, m7, 6, %1, %3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
173 lea %1, [%1+%3*2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
174 STORE_DIFFx2 m1, m2, m5, m6, m7, 6, %1, %3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
175 %endmacro
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
176
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
177 INIT_MMX
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
178 ; ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
179 cglobal h264_idct8_add_mmx, 3, 4, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
180 %assign pad 128+4-(stack_offset&7)
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
181 SUB rsp, pad
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
182
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
183 add word [r1], 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
184 IDCT8_ADD_MMX_START r1 , rsp
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
185 IDCT8_ADD_MMX_START r1+8, rsp+64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
186 lea r3, [r0+4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
187 IDCT8_ADD_MMX_END r0 , rsp, r2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
188 IDCT8_ADD_MMX_END r3 , rsp+8, r2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
189
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
190 ADD rsp, pad
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
191 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
192
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
193 ; %1=uint8_t *dst, %2=int16_t *block, %3=int stride
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
194 %macro IDCT8_ADD_SSE 4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
195 IDCT8_1D_FULL %2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
196 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
197 TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
198 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
199 TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [%2], [%2+16]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
200 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
201 paddw m0, [pw_32]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
202
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
203 %ifndef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
204 mova [%2 ], m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
205 mova [%2+16], m4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
206 IDCT8_1D [%2], [%2+ 16]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
207 mova [%2 ], m6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
208 mova [%2+16], m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
209 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
210 SWAP 0, 8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
211 SWAP 4, 9
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
212 IDCT8_1D m8, m9
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
213 SWAP 6, 8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
214 SWAP 7, 9
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
215 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
216
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
217 pxor m7, m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
218 lea %4, [%3*3]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
219 STORE_DIFF m0, m6, m7, [%1 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
220 STORE_DIFF m1, m6, m7, [%1+%3 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
221 STORE_DIFF m2, m6, m7, [%1+%3*2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
222 STORE_DIFF m3, m6, m7, [%1+%4 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
223 %ifndef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
224 mova m0, [%2 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
225 mova m1, [%2+16]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
226 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
227 SWAP 0, 8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
228 SWAP 1, 9
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
229 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
230 lea %1, [%1+%3*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
231 STORE_DIFF m4, m6, m7, [%1 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
232 STORE_DIFF m5, m6, m7, [%1+%3 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
233 STORE_DIFF m0, m6, m7, [%1+%3*2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
234 STORE_DIFF m1, m6, m7, [%1+%4 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
235 %endmacro
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
236
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
237 INIT_XMM
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
238 ; ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride)
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
239 cglobal h264_idct8_add_sse2, 3, 4, 10
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
240 IDCT8_ADD_SSE r0, r1, r2, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
241 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
242
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
243 %macro DC_ADD_MMX2_INIT 2-3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
244 %if %0 == 2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
245 movsx %1, word [%1]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
246 add %1, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
247 sar %1, 6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
248 movd m0, %1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
249 lea %1, [%2*3]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
250 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
251 add %3, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
252 sar %3, 6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
253 movd m0, %3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
254 lea %3, [%2*3]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
255 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
256 pshufw m0, m0, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
257 pxor m1, m1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
258 psubw m1, m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
259 packuswb m0, m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
260 packuswb m1, m1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
261 %endmacro
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
262
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
263 %macro DC_ADD_MMX2_OP 3-4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
264 %1 m2, [%2 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
265 %1 m3, [%2+%3 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
266 %1 m4, [%2+%3*2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
267 %1 m5, [%2+%4 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
268 paddusb m2, m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
269 paddusb m3, m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
270 paddusb m4, m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
271 paddusb m5, m0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
272 psubusb m2, m1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
273 psubusb m3, m1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
274 psubusb m4, m1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
275 psubusb m5, m1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
276 %1 [%2 ], m2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
277 %1 [%2+%3 ], m3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
278 %1 [%2+%3*2], m4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
279 %1 [%2+%4 ], m5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
280 %endmacro
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
281
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
282 INIT_MMX
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
283 ; ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
284 cglobal h264_idct_dc_add_mmx2, 3, 3, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
285 DC_ADD_MMX2_INIT r1, r2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
286 DC_ADD_MMX2_OP movh, r0, r2, r1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
287 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
288
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
289 ; ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
290 cglobal h264_idct8_dc_add_mmx2, 3, 3, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
291 DC_ADD_MMX2_INIT r1, r2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
292 DC_ADD_MMX2_OP mova, r0, r2, r1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
293 lea r0, [r0+r2*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
294 DC_ADD_MMX2_OP mova, r0, r2, r1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
295 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
296
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
297 ; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
298 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
299 cglobal h264_idct_add16_mmx, 5, 7, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
300 xor r5, r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
301 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
302 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
303 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
304 .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
305 movzx r6, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
306 movzx r6, byte [r4+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
307 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
308 jz .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
309 mov r6d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
310 lea r6, [r0+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
311 IDCT4_ADD r6, r2, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
312 .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
313 inc r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
314 add r2, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
315 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
316 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
317 REP_RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
318
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
319 ; ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
320 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
321 cglobal h264_idct8_add4_mmx, 5, 7, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
322 %assign pad 128+4-(stack_offset&7)
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
323 SUB rsp, pad
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
324
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
325 xor r5, r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
326 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
327 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
328 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
329 .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
330 movzx r6, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
331 movzx r6, byte [r4+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
332 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
333 jz .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
334 mov r6d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
335 lea r6, [r0+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
336 add word [r2], 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
337 IDCT8_ADD_MMX_START r2 , rsp
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
338 IDCT8_ADD_MMX_START r2+8, rsp+64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
339 IDCT8_ADD_MMX_END r6 , rsp, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
340 mov r6d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
341 lea r6, [r0+r6+4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
342 IDCT8_ADD_MMX_END r6 , rsp+8, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
343 .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
344 add r5, 4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
345 add r2, 128
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
346 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
347 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
348 ADD rsp, pad
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
349 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
350
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
351 ; ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
352 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
353 cglobal h264_idct_add16_mmx2, 5, 7, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
354 xor r5, r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
355 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
356 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
357 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
358 .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
359 movzx r6, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
360 movzx r6, byte [r4+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
361 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
362 jz .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
363 cmp r6, 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
364 jnz .no_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
365 movsx r6, word [r2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
366 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
367 jz .no_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
368 DC_ADD_MMX2_INIT r2, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
369 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
370 %define dst_reg r10
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
371 %define dst_regd r10d
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
372 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
373 %define dst_reg r1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
374 %define dst_regd r1d
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
375 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
376 mov dst_regd, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
377 lea dst_reg, [r0+dst_reg]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
378 DC_ADD_MMX2_OP movh, dst_reg, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
379 %ifndef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
380 mov r1, r1m
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
381 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
382 inc r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
383 add r2, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
384 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
385 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
386 REP_RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
387 .no_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
388 mov r6d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
389 lea r6, [r0+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
390 IDCT4_ADD r6, r2, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
391 .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
392 inc r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
393 add r2, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
394 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
395 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
396 REP_RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
397
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
398 ; ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
399 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
400 cglobal h264_idct_add16intra_mmx, 5, 7, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
401 xor r5, r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
402 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
403 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
404 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
405 .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
406 movzx r6, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
407 movzx r6, byte [r4+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
408 or r6w, word [r2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
409 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
410 jz .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
411 mov r6d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
412 lea r6, [r0+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
413 IDCT4_ADD r6, r2, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
414 .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
415 inc r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
416 add r2, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
417 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
418 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
419 REP_RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
420
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
421 ; ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
422 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
423 cglobal h264_idct_add16intra_mmx2, 5, 7, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
424 xor r5, r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
425 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
426 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
427 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
428 .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
429 movzx r6, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
430 movzx r6, byte [r4+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
431 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
432 jz .try_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
433 mov r6d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
434 lea r6, [r0+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
435 IDCT4_ADD r6, r2, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
436 inc r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
437 add r2, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
438 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
439 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
440 REP_RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
441 .try_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
442 movsx r6, word [r2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
443 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
444 jz .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
445 DC_ADD_MMX2_INIT r2, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
446 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
447 %define dst_reg r10
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
448 %define dst_regd r10d
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
449 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
450 %define dst_reg r1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
451 %define dst_regd r1d
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
452 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
453 mov dst_regd, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
454 lea dst_reg, [r0+dst_reg]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
455 DC_ADD_MMX2_OP movh, dst_reg, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
456 %ifndef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
457 mov r1, r1m
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
458 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
459 .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
460 inc r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
461 add r2, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
462 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
463 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
464 REP_RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
465
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
466 ; ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
467 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
468 cglobal h264_idct8_add4_mmx2, 5, 7, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
469 %assign pad 128+4-(stack_offset&7)
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
470 SUB rsp, pad
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
471
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
472 xor r5, r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
473 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
474 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
475 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
476 .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
477 movzx r6, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
478 movzx r6, byte [r4+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
479 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
480 jz .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
481 cmp r6, 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
482 jnz .no_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
483 movsx r6, word [r2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
484 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
485 jz .no_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
486 DC_ADD_MMX2_INIT r2, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
487 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
488 %define dst_reg r10
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
489 %define dst_regd r10d
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
490 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
491 %define dst_reg r1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
492 %define dst_regd r1d
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
493 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
494 mov dst_regd, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
495 lea dst_reg, [r0+dst_reg]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
496 DC_ADD_MMX2_OP mova, dst_reg, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
497 lea dst_reg, [dst_reg+r3*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
498 DC_ADD_MMX2_OP mova, dst_reg, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
499 %ifndef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
500 mov r1, r1m
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
501 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
502 add r5, 4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
503 add r2, 128
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
504 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
505 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
506
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
507 ADD rsp, pad
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
508 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
509 .no_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
510 mov r6d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
511 lea r6, [r0+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
512 add word [r2], 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
513 IDCT8_ADD_MMX_START r2 , rsp
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
514 IDCT8_ADD_MMX_START r2+8, rsp+64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
515 IDCT8_ADD_MMX_END r6 , rsp, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
516 mov r6d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
517 lea r6, [r0+r6+4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
518 IDCT8_ADD_MMX_END r6 , rsp+8, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
519 .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
520 add r5, 4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
521 add r2, 128
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
522 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
523 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
524
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
525 ADD rsp, pad
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
526 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
527
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
528 INIT_XMM
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
529 ; ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
530 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
531 cglobal h264_idct8_add4_sse2, 5, 7, 10
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
532 xor r5, r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
533 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
534 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
535 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
536 .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
537 movzx r6, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
538 movzx r6, byte [r4+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
539 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
540 jz .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
541 cmp r6, 1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
542 jnz .no_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
543 movsx r6, word [r2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
544 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
545 jz .no_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
546 INIT_MMX
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
547 DC_ADD_MMX2_INIT r2, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
548 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
549 %define dst_reg r10
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
550 %define dst_regd r10d
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
551 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
552 %define dst_reg r1
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
553 %define dst_regd r1d
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
554 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
555 mov dst_regd, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
556 lea dst_reg, [r0+dst_reg]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
557 DC_ADD_MMX2_OP mova, dst_reg, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
558 lea dst_reg, [dst_reg+r3*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
559 DC_ADD_MMX2_OP mova, dst_reg, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
560 %ifndef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
561 mov r1, r1m
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
562 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
563 add r5, 4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
564 add r2, 128
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
565 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
566 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
567 REP_RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
568 .no_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
569 INIT_XMM
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
570 mov dst_regd, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
571 lea dst_reg, [r0+dst_reg]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
572 IDCT8_ADD_SSE dst_reg, r2, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
573 %ifndef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
574 mov r1, r1m
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
575 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
576 .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
577 add r5, 4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
578 add r2, 128
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
579 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
580 jl .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
581 REP_RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
582
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
583 INIT_MMX
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
584 h264_idct_add8_mmx_plane:
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
585 .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
586 movzx r6, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
587 movzx r6, byte [r4+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
588 or r6w, word [r2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
589 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
590 jz .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
591 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
592 mov r0d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
593 add r0, [r10]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
594 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
595 mov r0, r1m ; XXX r1m here is actually r0m of the calling func
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
596 mov r0, [r0]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
597 add r0, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
598 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
599 IDCT4_ADD r0, r2, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
600 .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
601 inc r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
602 add r2, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
603 test r5, 3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
604 jnz .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
605 rep ret
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
606
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
607 ; ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
608 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
609 cglobal h264_idct_add8_mmx, 5, 7, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
610 mov r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
611 add r2, 512
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
612 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
613 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
614 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
615 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
616 mov r10, r0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
617 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
618 call h264_idct_add8_mmx_plane
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
619 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
620 add r10, gprsize
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
621 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
622 add r0mp, gprsize
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
623 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
624 call h264_idct_add8_mmx_plane
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
625 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
626
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
627 h264_idct_add8_mmx2_plane
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
628 .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
629 movzx r6, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
630 movzx r6, byte [r4+r6]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
631 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
632 jz .try_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
633 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
634 mov r0d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
635 add r0, [r10]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
636 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
637 mov r0, r1m ; XXX r1m here is actually r0m of the calling func
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
638 mov r0, [r0]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
639 add r0, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
640 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
641 IDCT4_ADD r0, r2, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
642 inc r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
643 add r2, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
644 test r5, 3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
645 jnz .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
646 rep ret
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
647 .try_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
648 movsx r6, word [r2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
649 test r6, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
650 jz .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
651 DC_ADD_MMX2_INIT r2, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
652 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
653 mov r0d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
654 add r0, [r10]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
655 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
656 mov r0, r1m ; XXX r1m here is actually r0m of the calling func
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
657 mov r0, [r0]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
658 add r0, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
659 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
660 DC_ADD_MMX2_OP movh, r0, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
661 .skipblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
662 inc r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
663 add r2, 32
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
664 test r5, 3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
665 jnz .nextblock
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
666 rep ret
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
667
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
668 ; ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
669 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
670 cglobal h264_idct_add8_mmx2, 5, 7, 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
671 mov r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
672 add r2, 512
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
673 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
674 mov r10, r0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
675 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
676 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
677 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
678 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
679 call h264_idct_add8_mmx2_plane
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
680 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
681 add r10, gprsize
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
682 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
683 add r0mp, gprsize
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
684 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
685 call h264_idct_add8_mmx2_plane
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
686 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
687
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
688 INIT_MMX
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
689 ; r0 = uint8_t *dst, r2 = int16_t *block, r3 = int stride, r6=clobbered
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
690 h264_idct_dc_add8_mmx2:
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
691 movd m0, [r2 ] ; 0 0 X D
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
692 punpcklwd m0, [r2+32] ; x X d D
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
693 paddsw m0, [pw_32]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
694 psraw m0, 6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
695 punpcklwd m0, m0 ; d d D D
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
696 pxor m1, m1 ; 0 0 0 0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
697 psubw m1, m0 ; -d-d-D-D
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
698 packuswb m0, m1 ; -d-d-D-D d d D D
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
699 pshufw m1, m0, 0xFA ; -d-d-d-d-D-D-D-D
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
700 punpcklwd m0, m0 ; d d d d D D D D
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
701 lea r6, [r3*3]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
702 DC_ADD_MMX2_OP movq, r0, r3, r6
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
703 ret
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
704
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
705 ALIGN 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
706 INIT_XMM
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
707 ; r0 = uint8_t *dst (clobbered), r2 = int16_t *block, r3 = int stride
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
708 x264_add8x4_idct_sse2:
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
709 movq m0, [r2+ 0]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
710 movq m1, [r2+ 8]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
711 movq m2, [r2+16]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
712 movq m3, [r2+24]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
713 movhps m0, [r2+32]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
714 movhps m1, [r2+40]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
715 movhps m2, [r2+48]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
716 movhps m3, [r2+56]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
717 IDCT4_1D 0,1,2,3,4,5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
718 TRANSPOSE2x4x4W 0,1,2,3,4
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
719 paddw m0, [pw_32]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
720 IDCT4_1D 0,1,2,3,4,5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
721 pxor m7, m7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
722 STORE_DIFFx2 m0, m1, m4, m5, m7, 6, r0, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
723 lea r0, [r0+r3*2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
724 STORE_DIFFx2 m2, m3, m4, m5, m7, 6, r0, r3
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
725 ret
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
726
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
727 %macro add16_sse2_cycle 2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
728 movzx r0, word [r4+%2]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
729 test r0, r0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
730 jz .cycle%1end
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
731 mov r0d, dword [r1+%1*8]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
732 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
733 add r0, r10
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
734 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
735 add r0, r0m
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
736 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
737 call x264_add8x4_idct_sse2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
738 .cycle%1end
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
739 %if %1 < 7
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
740 add r2, 64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
741 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
742 %endmacro
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
743
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
744 ; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
745 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
746 cglobal h264_idct_add16_sse2, 5, 5, 8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
747 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
748 mov r10, r0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
749 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
750 ; unrolling of the loop leads to an average performance gain of
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
751 ; 20-25%
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
752 add16_sse2_cycle 0, 0xc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
753 add16_sse2_cycle 1, 0x14
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
754 add16_sse2_cycle 2, 0xe
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
755 add16_sse2_cycle 3, 0x16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
756 add16_sse2_cycle 4, 0x1c
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
757 add16_sse2_cycle 5, 0x24
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
758 add16_sse2_cycle 6, 0x1e
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
759 add16_sse2_cycle 7, 0x26
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
760 RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
761
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
762 ; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
763 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
764 cglobal h264_idct_add16intra_sse2, 5, 7, 8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
765 xor r5, r5
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
766 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
767 mov r10, r0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
768 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
769 %ifdef PIC
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
770 lea r11, [scan8_mem]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
771 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
772 .next2blocks
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
773 movzx r0, byte [scan8+r5]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
774 movzx r0, word [r4+r0]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
775 test r0, r0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
776 jz .try_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
777 mov r0d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
778 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
779 add r0, r10
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
780 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
781 add r0, r0m
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
782 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
783 call x264_add8x4_idct_sse2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
784 add r5, 2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
785 add r2, 64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
786 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
787 jl .next2blocks
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
788 REP_RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
789 .try_dc
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
790 movsx r0, word [r2 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
791 or r0w, word [r2+32]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
792 jz .skip2blocks
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
793 mov r0d, dword [r1+r5*4]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
794 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
795 add r0, r10
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
796 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
797 add r0, r0m
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
798 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
799 call h264_idct_dc_add8_mmx2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
800 .skip2blocks
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
801 add r5, 2
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
802 add r2, 64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
803 cmp r5, 16
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
804 jl .next2blocks
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
805 REP_RET
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
806
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
807 %macro add8_sse2_cycle 2
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
808 movzx r0, word [r4+%2]
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
809 test r0, r0
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
810 jz .try%1dc
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
811 %ifdef ARCH_X86_64
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
812 mov r0d, dword [r1+%1*8+64]
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
813 add r0, [r10]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
814 %else
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
815 mov r0, r0m
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
816 mov r0, [r0]
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
817 add r0, dword [r1+%1*8+64]
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
818 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
819 call x264_add8x4_idct_sse2
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
820 jmp .cycle%1end
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
821 .try%1dc
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
822 movsx r0, word [r2 ]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
823 or r0w, word [r2+32]
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
824 jz .cycle%1end
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
825 %ifdef ARCH_X86_64
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
826 mov r0d, dword [r1+%1*8+64]
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
827 add r0, [r10]
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
828 %else
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
829 mov r0, r0m
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
830 mov r0, [r0]
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
831 add r0, dword [r1+%1*8+64]
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
832 %endif
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
833 call h264_idct_dc_add8_mmx2
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
834 .cycle%1end
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
835 %if %1 < 3
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
836 add r2, 64
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
837 %endif
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
838 %endmacro
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
839
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
840 ; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset,
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
841 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
842 cglobal h264_idct_add8_sse2, 5, 7, 8
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
843 add r2, 512
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
844 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
845 mov r10, r0
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
846 %endif
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
847 add8_sse2_cycle 0, 0x09
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
848 add8_sse2_cycle 1, 0x11
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
849 %ifdef ARCH_X86_64
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
850 add r10, gprsize
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
851 %else
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
852 add r0mp, gprsize
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
853 %endif
12510
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
854 add8_sse2_cycle 2, 0x21
ef2f2db5b7be Unroll loop in h264_idct_add8_sse2(). This means we can inline scan8[] in the
rbultje
parents: 12492
diff changeset
855 add8_sse2_cycle 3, 0x29
12492
58a960d6e34c Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff changeset
856 RET