Mercurial > libavcodec.hg
annotate x86/h264_idct.asm @ 12495:fac8063ed1e7 libavcodec
Allow float values for libmp3lame quality.
Patch by James Darnley, james D darnley A gmail
author | cehoyos |
---|---|
date | Wed, 15 Sep 2010 22:10:13 +0000 |
parents | 58a960d6e34c |
children | ef2f2db5b7be |
rev | line source |
---|---|
12492
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
1 ;***************************************************************************** |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
2 ;* MMX/SSE2-optimized H.264 iDCT |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
3 ;***************************************************************************** |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
4 ;* Copyright (C) 2004-2005 Michael Niedermayer, Loren Merritt |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
5 ;* Copyright (C) 2003-2008 x264 project |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
6 ;* |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
7 ;* Authors: Laurent Aimar <fenrir@via.ecp.fr> |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
8 ;* Loren Merritt <lorenm@u.washington.edu> |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
9 ;* Holger Lubitz <hal@duncan.ol.sub.de> |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
10 ;* Min Chen <chenm001.163.com> |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
11 ;* |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
12 ;* This file is part of FFmpeg. |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
13 ;* |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
14 ;* FFmpeg is free software; you can redistribute it and/or |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
15 ;* modify it under the terms of the GNU Lesser General Public |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
16 ;* License as published by the Free Software Foundation; either |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
17 ;* version 2.1 of the License, or (at your option) any later version. |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
18 ;* |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
19 ;* FFmpeg is distributed in the hope that it will be useful, |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
20 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
21 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
22 ;* Lesser General Public License for more details. |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
23 ;* |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
24 ;* You should have received a copy of the GNU Lesser General Public |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
25 ;* License along with FFmpeg; if not, write to the Free Software |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
26 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
27 ;***************************************************************************** |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
28 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
29 %include "x86inc.asm" |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
30 %include "x86util.asm" |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
31 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
32 SECTION_RODATA |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
33 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
34 ; FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
35 scan8_mem: db 4+1*8, 5+1*8, 4+2*8, 5+2*8 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
36 db 6+1*8, 7+1*8, 6+2*8, 7+2*8 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
37 db 4+3*8, 5+3*8, 4+4*8, 5+4*8 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
38 db 6+3*8, 7+3*8, 6+4*8, 7+4*8 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
39 db 1+1*8, 2+1*8 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
40 db 1+2*8, 2+2*8 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
41 db 1+4*8, 2+4*8 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
42 db 1+5*8, 2+5*8 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
43 %ifdef PIC |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
44 %define scan8 r11 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
45 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
46 %define scan8 scan8_mem |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
47 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
48 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
49 cextern pw_32 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
50 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
51 SECTION .text |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
52 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
53 ; %1=uint8_t *dst, %2=int16_t *block, %3=int stride |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
54 %macro IDCT4_ADD 3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
55 ; Load dct coeffs |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
56 movq m0, [%2] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
57 movq m1, [%2+8] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
58 movq m2, [%2+16] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
59 movq m3, [%2+24] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
60 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
61 IDCT4_1D 0, 1, 2, 3, 4, 5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
62 mova m6, [pw_32] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
63 TRANSPOSE4x4W 0, 1, 2, 3, 4 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
64 paddw m0, m6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
65 IDCT4_1D 0, 1, 2, 3, 4, 5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
66 pxor m7, m7 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
67 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
68 STORE_DIFFx2 m0, m1, m4, m5, m7, 6, %1, %3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
69 lea %1, [%1+%3*2] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
70 STORE_DIFFx2 m2, m3, m4, m5, m7, 6, %1, %3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
71 %endmacro |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
72 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
73 INIT_MMX |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
74 ; ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
75 cglobal h264_idct_add_mmx, 3, 3, 0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
76 IDCT4_ADD r0, r1, r2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
77 RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
78 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
79 %macro IDCT8_1D 2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
80 mova m4, m5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
81 mova m0, m1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
82 psraw m4, 1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
83 psraw m1, 1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
84 paddw m4, m5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
85 paddw m1, m0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
86 paddw m4, m7 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
87 paddw m1, m5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
88 psubw m4, m0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
89 paddw m1, m3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
90 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
91 psubw m0, m3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
92 psubw m5, m3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
93 paddw m0, m7 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
94 psubw m5, m7 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
95 psraw m3, 1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
96 psraw m7, 1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
97 psubw m0, m3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
98 psubw m5, m7 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
99 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
100 mova m3, m4 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
101 mova m7, m1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
102 psraw m1, 2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
103 psraw m3, 2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
104 paddw m3, m0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
105 psraw m0, 2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
106 paddw m1, m5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
107 psraw m5, 2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
108 psubw m0, m4 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
109 psubw m7, m5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
110 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
111 mova m4, m2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
112 mova m5, m6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
113 psraw m4, 1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
114 psraw m6, 1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
115 psubw m4, m5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
116 paddw m6, m2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
117 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
118 mova m2, %1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
119 mova m5, %2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
120 SUMSUB_BA m5, m2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
121 SUMSUB_BA m6, m5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
122 SUMSUB_BA m4, m2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
123 SUMSUB_BA m7, m6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
124 SUMSUB_BA m0, m4 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
125 SUMSUB_BA m3, m2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
126 SUMSUB_BA m1, m5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
127 SWAP 7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
128 %endmacro |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
129 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
130 %macro IDCT8_1D_FULL 1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
131 mova m7, [%1+112] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
132 mova m6, [%1+ 96] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
133 mova m5, [%1+ 80] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
134 mova m3, [%1+ 48] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
135 mova m2, [%1+ 32] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
136 mova m1, [%1+ 16] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
137 IDCT8_1D [%1], [%1+ 64] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
138 %endmacro |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
139 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
140 ; %1=int16_t *block, %2=int16_t *dstblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
141 %macro IDCT8_ADD_MMX_START 2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
142 IDCT8_1D_FULL %1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
143 mova [%1], m7 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
144 TRANSPOSE4x4W 0, 1, 2, 3, 7 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
145 mova m7, [%1] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
146 mova [%2 ], m0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
147 mova [%2+16], m1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
148 mova [%2+32], m2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
149 mova [%2+48], m3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
150 TRANSPOSE4x4W 4, 5, 6, 7, 3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
151 mova [%2+ 8], m4 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
152 mova [%2+24], m5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
153 mova [%2+40], m6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
154 mova [%2+56], m7 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
155 %endmacro |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
156 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
157 ; %1=uint8_t *dst, %2=int16_t *block, %3=int stride |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
158 %macro IDCT8_ADD_MMX_END 3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
159 IDCT8_1D_FULL %2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
160 mova [%2 ], m5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
161 mova [%2+16], m6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
162 mova [%2+32], m7 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
163 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
164 pxor m7, m7 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
165 STORE_DIFFx2 m0, m1, m5, m6, m7, 6, %1, %3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
166 lea %1, [%1+%3*2] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
167 STORE_DIFFx2 m2, m3, m5, m6, m7, 6, %1, %3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
168 mova m0, [%2 ] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
169 mova m1, [%2+16] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
170 mova m2, [%2+32] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
171 lea %1, [%1+%3*2] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
172 STORE_DIFFx2 m4, m0, m5, m6, m7, 6, %1, %3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
173 lea %1, [%1+%3*2] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
174 STORE_DIFFx2 m1, m2, m5, m6, m7, 6, %1, %3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
175 %endmacro |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
176 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
177 INIT_MMX |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
178 ; ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
179 cglobal h264_idct8_add_mmx, 3, 4, 0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
180 %assign pad 128+4-(stack_offset&7) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
181 SUB rsp, pad |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
182 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
183 add word [r1], 32 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
184 IDCT8_ADD_MMX_START r1 , rsp |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
185 IDCT8_ADD_MMX_START r1+8, rsp+64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
186 lea r3, [r0+4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
187 IDCT8_ADD_MMX_END r0 , rsp, r2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
188 IDCT8_ADD_MMX_END r3 , rsp+8, r2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
189 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
190 ADD rsp, pad |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
191 RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
192 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
193 ; %1=uint8_t *dst, %2=int16_t *block, %3=int stride |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
194 %macro IDCT8_ADD_SSE 4 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
195 IDCT8_1D_FULL %2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
196 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
197 TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
198 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
199 TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [%2], [%2+16] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
200 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
201 paddw m0, [pw_32] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
202 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
203 %ifndef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
204 mova [%2 ], m0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
205 mova [%2+16], m4 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
206 IDCT8_1D [%2], [%2+ 16] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
207 mova [%2 ], m6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
208 mova [%2+16], m7 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
209 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
210 SWAP 0, 8 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
211 SWAP 4, 9 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
212 IDCT8_1D m8, m9 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
213 SWAP 6, 8 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
214 SWAP 7, 9 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
215 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
216 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
217 pxor m7, m7 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
218 lea %4, [%3*3] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
219 STORE_DIFF m0, m6, m7, [%1 ] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
220 STORE_DIFF m1, m6, m7, [%1+%3 ] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
221 STORE_DIFF m2, m6, m7, [%1+%3*2] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
222 STORE_DIFF m3, m6, m7, [%1+%4 ] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
223 %ifndef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
224 mova m0, [%2 ] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
225 mova m1, [%2+16] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
226 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
227 SWAP 0, 8 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
228 SWAP 1, 9 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
229 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
230 lea %1, [%1+%3*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
231 STORE_DIFF m4, m6, m7, [%1 ] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
232 STORE_DIFF m5, m6, m7, [%1+%3 ] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
233 STORE_DIFF m0, m6, m7, [%1+%3*2] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
234 STORE_DIFF m1, m6, m7, [%1+%4 ] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
235 %endmacro |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
236 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
237 INIT_XMM |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
238 ; ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
239 cglobal h264_idct8_add_sse2, 3, 4, 10 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
240 IDCT8_ADD_SSE r0, r1, r2, r3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
241 RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
242 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
243 %macro DC_ADD_MMX2_INIT 2-3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
244 %if %0 == 2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
245 movsx %1, word [%1] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
246 add %1, 32 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
247 sar %1, 6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
248 movd m0, %1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
249 lea %1, [%2*3] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
250 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
251 add %3, 32 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
252 sar %3, 6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
253 movd m0, %3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
254 lea %3, [%2*3] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
255 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
256 pshufw m0, m0, 0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
257 pxor m1, m1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
258 psubw m1, m0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
259 packuswb m0, m0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
260 packuswb m1, m1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
261 %endmacro |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
262 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
263 %macro DC_ADD_MMX2_OP 3-4 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
264 %1 m2, [%2 ] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
265 %1 m3, [%2+%3 ] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
266 %1 m4, [%2+%3*2] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
267 %1 m5, [%2+%4 ] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
268 paddusb m2, m0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
269 paddusb m3, m0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
270 paddusb m4, m0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
271 paddusb m5, m0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
272 psubusb m2, m1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
273 psubusb m3, m1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
274 psubusb m4, m1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
275 psubusb m5, m1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
276 %1 [%2 ], m2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
277 %1 [%2+%3 ], m3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
278 %1 [%2+%3*2], m4 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
279 %1 [%2+%4 ], m5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
280 %endmacro |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
281 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
282 INIT_MMX |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
283 ; ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
284 cglobal h264_idct_dc_add_mmx2, 3, 3, 0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
285 DC_ADD_MMX2_INIT r1, r2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
286 DC_ADD_MMX2_OP movh, r0, r2, r1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
287 RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
288 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
289 ; ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
290 cglobal h264_idct8_dc_add_mmx2, 3, 3, 0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
291 DC_ADD_MMX2_INIT r1, r2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
292 DC_ADD_MMX2_OP mova, r0, r2, r1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
293 lea r0, [r0+r2*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
294 DC_ADD_MMX2_OP mova, r0, r2, r1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
295 RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
296 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
297 ; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset, |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
298 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
299 cglobal h264_idct_add16_mmx, 5, 7, 0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
300 xor r5, r5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
301 %ifdef PIC |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
302 lea r11, [scan8_mem] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
303 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
304 .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
305 movzx r6, byte [scan8+r5] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
306 movzx r6, byte [r4+r6] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
307 test r6, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
308 jz .skipblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
309 mov r6d, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
310 lea r6, [r0+r6] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
311 IDCT4_ADD r6, r2, r3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
312 .skipblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
313 inc r5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
314 add r2, 32 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
315 cmp r5, 16 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
316 jl .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
317 REP_RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
318 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
319 ; ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset, |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
320 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
321 cglobal h264_idct8_add4_mmx, 5, 7, 0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
322 %assign pad 128+4-(stack_offset&7) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
323 SUB rsp, pad |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
324 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
325 xor r5, r5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
326 %ifdef PIC |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
327 lea r11, [scan8_mem] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
328 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
329 .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
330 movzx r6, byte [scan8+r5] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
331 movzx r6, byte [r4+r6] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
332 test r6, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
333 jz .skipblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
334 mov r6d, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
335 lea r6, [r0+r6] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
336 add word [r2], 32 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
337 IDCT8_ADD_MMX_START r2 , rsp |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
338 IDCT8_ADD_MMX_START r2+8, rsp+64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
339 IDCT8_ADD_MMX_END r6 , rsp, r3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
340 mov r6d, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
341 lea r6, [r0+r6+4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
342 IDCT8_ADD_MMX_END r6 , rsp+8, r3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
343 .skipblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
344 add r5, 4 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
345 add r2, 128 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
346 cmp r5, 16 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
347 jl .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
348 ADD rsp, pad |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
349 RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
350 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
351 ; ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset, |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
352 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
353 cglobal h264_idct_add16_mmx2, 5, 7, 0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
354 xor r5, r5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
355 %ifdef PIC |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
356 lea r11, [scan8_mem] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
357 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
358 .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
359 movzx r6, byte [scan8+r5] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
360 movzx r6, byte [r4+r6] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
361 test r6, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
362 jz .skipblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
363 cmp r6, 1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
364 jnz .no_dc |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
365 movsx r6, word [r2] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
366 test r6, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
367 jz .no_dc |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
368 DC_ADD_MMX2_INIT r2, r3, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
369 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
370 %define dst_reg r10 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
371 %define dst_regd r10d |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
372 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
373 %define dst_reg r1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
374 %define dst_regd r1d |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
375 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
376 mov dst_regd, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
377 lea dst_reg, [r0+dst_reg] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
378 DC_ADD_MMX2_OP movh, dst_reg, r3, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
379 %ifndef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
380 mov r1, r1m |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
381 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
382 inc r5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
383 add r2, 32 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
384 cmp r5, 16 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
385 jl .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
386 REP_RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
387 .no_dc |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
388 mov r6d, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
389 lea r6, [r0+r6] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
390 IDCT4_ADD r6, r2, r3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
391 .skipblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
392 inc r5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
393 add r2, 32 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
394 cmp r5, 16 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
395 jl .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
396 REP_RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
397 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
398 ; ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset, |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
399 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
400 cglobal h264_idct_add16intra_mmx, 5, 7, 0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
401 xor r5, r5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
402 %ifdef PIC |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
403 lea r11, [scan8_mem] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
404 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
405 .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
406 movzx r6, byte [scan8+r5] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
407 movzx r6, byte [r4+r6] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
408 or r6w, word [r2] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
409 test r6, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
410 jz .skipblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
411 mov r6d, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
412 lea r6, [r0+r6] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
413 IDCT4_ADD r6, r2, r3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
414 .skipblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
415 inc r5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
416 add r2, 32 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
417 cmp r5, 16 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
418 jl .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
419 REP_RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
420 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
421 ; ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset, |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
422 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
423 cglobal h264_idct_add16intra_mmx2, 5, 7, 0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
424 xor r5, r5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
425 %ifdef PIC |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
426 lea r11, [scan8_mem] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
427 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
428 .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
429 movzx r6, byte [scan8+r5] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
430 movzx r6, byte [r4+r6] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
431 test r6, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
432 jz .try_dc |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
433 mov r6d, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
434 lea r6, [r0+r6] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
435 IDCT4_ADD r6, r2, r3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
436 inc r5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
437 add r2, 32 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
438 cmp r5, 16 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
439 jl .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
440 REP_RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
441 .try_dc |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
442 movsx r6, word [r2] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
443 test r6, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
444 jz .skipblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
445 DC_ADD_MMX2_INIT r2, r3, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
446 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
447 %define dst_reg r10 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
448 %define dst_regd r10d |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
449 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
450 %define dst_reg r1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
451 %define dst_regd r1d |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
452 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
453 mov dst_regd, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
454 lea dst_reg, [r0+dst_reg] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
455 DC_ADD_MMX2_OP movh, dst_reg, r3, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
456 %ifndef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
457 mov r1, r1m |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
458 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
459 .skipblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
460 inc r5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
461 add r2, 32 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
462 cmp r5, 16 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
463 jl .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
464 REP_RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
465 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
466 ; ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset, |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
467 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
468 cglobal h264_idct8_add4_mmx2, 5, 7, 0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
469 %assign pad 128+4-(stack_offset&7) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
470 SUB rsp, pad |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
471 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
472 xor r5, r5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
473 %ifdef PIC |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
474 lea r11, [scan8_mem] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
475 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
476 .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
477 movzx r6, byte [scan8+r5] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
478 movzx r6, byte [r4+r6] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
479 test r6, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
480 jz .skipblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
481 cmp r6, 1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
482 jnz .no_dc |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
483 movsx r6, word [r2] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
484 test r6, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
485 jz .no_dc |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
486 DC_ADD_MMX2_INIT r2, r3, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
487 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
488 %define dst_reg r10 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
489 %define dst_regd r10d |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
490 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
491 %define dst_reg r1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
492 %define dst_regd r1d |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
493 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
494 mov dst_regd, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
495 lea dst_reg, [r0+dst_reg] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
496 DC_ADD_MMX2_OP mova, dst_reg, r3, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
497 lea dst_reg, [dst_reg+r3*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
498 DC_ADD_MMX2_OP mova, dst_reg, r3, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
499 %ifndef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
500 mov r1, r1m |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
501 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
502 add r5, 4 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
503 add r2, 128 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
504 cmp r5, 16 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
505 jl .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
506 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
507 ADD rsp, pad |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
508 RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
509 .no_dc |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
510 mov r6d, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
511 lea r6, [r0+r6] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
512 add word [r2], 32 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
513 IDCT8_ADD_MMX_START r2 , rsp |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
514 IDCT8_ADD_MMX_START r2+8, rsp+64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
515 IDCT8_ADD_MMX_END r6 , rsp, r3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
516 mov r6d, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
517 lea r6, [r0+r6+4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
518 IDCT8_ADD_MMX_END r6 , rsp+8, r3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
519 .skipblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
520 add r5, 4 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
521 add r2, 128 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
522 cmp r5, 16 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
523 jl .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
524 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
525 ADD rsp, pad |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
526 RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
527 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
528 INIT_XMM |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
529 ; ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset, |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
530 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
531 cglobal h264_idct8_add4_sse2, 5, 7, 10 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
532 xor r5, r5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
533 %ifdef PIC |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
534 lea r11, [scan8_mem] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
535 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
536 .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
537 movzx r6, byte [scan8+r5] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
538 movzx r6, byte [r4+r6] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
539 test r6, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
540 jz .skipblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
541 cmp r6, 1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
542 jnz .no_dc |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
543 movsx r6, word [r2] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
544 test r6, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
545 jz .no_dc |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
546 INIT_MMX |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
547 DC_ADD_MMX2_INIT r2, r3, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
548 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
549 %define dst_reg r10 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
550 %define dst_regd r10d |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
551 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
552 %define dst_reg r1 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
553 %define dst_regd r1d |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
554 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
555 mov dst_regd, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
556 lea dst_reg, [r0+dst_reg] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
557 DC_ADD_MMX2_OP mova, dst_reg, r3, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
558 lea dst_reg, [dst_reg+r3*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
559 DC_ADD_MMX2_OP mova, dst_reg, r3, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
560 %ifndef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
561 mov r1, r1m |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
562 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
563 add r5, 4 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
564 add r2, 128 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
565 cmp r5, 16 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
566 jl .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
567 REP_RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
568 .no_dc |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
569 INIT_XMM |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
570 mov dst_regd, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
571 lea dst_reg, [r0+dst_reg] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
572 IDCT8_ADD_SSE dst_reg, r2, r3, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
573 %ifndef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
574 mov r1, r1m |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
575 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
576 .skipblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
577 add r5, 4 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
578 add r2, 128 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
579 cmp r5, 16 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
580 jl .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
581 REP_RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
582 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
583 INIT_MMX |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
584 h264_idct_add8_mmx_plane: |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
585 .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
586 movzx r6, byte [scan8+r5] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
587 movzx r6, byte [r4+r6] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
588 or r6w, word [r2] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
589 test r6, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
590 jz .skipblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
591 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
592 mov r0d, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
593 add r0, [r10] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
594 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
595 mov r0, r1m ; XXX r1m here is actually r0m of the calling func |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
596 mov r0, [r0] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
597 add r0, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
598 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
599 IDCT4_ADD r0, r2, r3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
600 .skipblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
601 inc r5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
602 add r2, 32 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
603 test r5, 3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
604 jnz .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
605 rep ret |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
606 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
607 ; ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset, |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
608 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
609 cglobal h264_idct_add8_mmx, 5, 7, 0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
610 mov r5, 16 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
611 add r2, 512 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
612 %ifdef PIC |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
613 lea r11, [scan8_mem] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
614 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
615 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
616 mov r10, r0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
617 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
618 call h264_idct_add8_mmx_plane |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
619 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
620 add r10, gprsize |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
621 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
622 add r0mp, gprsize |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
623 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
624 call h264_idct_add8_mmx_plane |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
625 RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
626 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
627 h264_idct_add8_mmx2_plane |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
628 .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
629 movzx r6, byte [scan8+r5] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
630 movzx r6, byte [r4+r6] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
631 test r6, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
632 jz .try_dc |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
633 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
634 mov r0d, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
635 add r0, [r10] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
636 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
637 mov r0, r1m ; XXX r1m here is actually r0m of the calling func |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
638 mov r0, [r0] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
639 add r0, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
640 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
641 IDCT4_ADD r0, r2, r3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
642 inc r5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
643 add r2, 32 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
644 test r5, 3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
645 jnz .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
646 rep ret |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
647 .try_dc |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
648 movsx r6, word [r2] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
649 test r6, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
650 jz .skipblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
651 DC_ADD_MMX2_INIT r2, r3, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
652 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
653 mov r0d, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
654 add r0, [r10] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
655 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
656 mov r0, r1m ; XXX r1m here is actually r0m of the calling func |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
657 mov r0, [r0] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
658 add r0, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
659 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
660 DC_ADD_MMX2_OP movh, r0, r3, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
661 .skipblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
662 inc r5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
663 add r2, 32 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
664 test r5, 3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
665 jnz .nextblock |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
666 rep ret |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
667 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
668 ; ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset, |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
669 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
670 cglobal h264_idct_add8_mmx2, 5, 7, 0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
671 mov r5, 16 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
672 add r2, 512 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
673 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
674 mov r10, r0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
675 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
676 %ifdef PIC |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
677 lea r11, [scan8_mem] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
678 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
679 call h264_idct_add8_mmx2_plane |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
680 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
681 add r10, gprsize |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
682 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
683 add r0mp, gprsize |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
684 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
685 call h264_idct_add8_mmx2_plane |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
686 RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
687 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
688 INIT_MMX |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
689 ; r0 = uint8_t *dst, r2 = int16_t *block, r3 = int stride, r6=clobbered |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
690 h264_idct_dc_add8_mmx2: |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
691 movd m0, [r2 ] ; 0 0 X D |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
692 punpcklwd m0, [r2+32] ; x X d D |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
693 paddsw m0, [pw_32] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
694 psraw m0, 6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
695 punpcklwd m0, m0 ; d d D D |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
696 pxor m1, m1 ; 0 0 0 0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
697 psubw m1, m0 ; -d-d-D-D |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
698 packuswb m0, m1 ; -d-d-D-D d d D D |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
699 pshufw m1, m0, 0xFA ; -d-d-d-d-D-D-D-D |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
700 punpcklwd m0, m0 ; d d d d D D D D |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
701 lea r6, [r3*3] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
702 DC_ADD_MMX2_OP movq, r0, r3, r6 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
703 ret |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
704 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
705 ALIGN 16 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
706 INIT_XMM |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
707 ; r0 = uint8_t *dst (clobbered), r2 = int16_t *block, r3 = int stride |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
708 x264_add8x4_idct_sse2: |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
709 movq m0, [r2+ 0] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
710 movq m1, [r2+ 8] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
711 movq m2, [r2+16] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
712 movq m3, [r2+24] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
713 movhps m0, [r2+32] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
714 movhps m1, [r2+40] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
715 movhps m2, [r2+48] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
716 movhps m3, [r2+56] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
717 IDCT4_1D 0,1,2,3,4,5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
718 TRANSPOSE2x4x4W 0,1,2,3,4 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
719 paddw m0, [pw_32] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
720 IDCT4_1D 0,1,2,3,4,5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
721 pxor m7, m7 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
722 STORE_DIFFx2 m0, m1, m4, m5, m7, 6, r0, r3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
723 lea r0, [r0+r3*2] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
724 STORE_DIFFx2 m2, m3, m4, m5, m7, 6, r0, r3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
725 ret |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
726 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
727 %macro add16_sse2_cycle 2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
728 movzx r0, word [r4+%2] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
729 test r0, r0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
730 jz .cycle%1end |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
731 mov r0d, dword [r1+%1*8] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
732 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
733 add r0, r10 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
734 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
735 add r0, r0m |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
736 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
737 call x264_add8x4_idct_sse2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
738 .cycle%1end |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
739 %if %1 < 7 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
740 add r2, 64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
741 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
742 %endmacro |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
743 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
744 ; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset, |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
745 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
746 cglobal h264_idct_add16_sse2, 5, 5, 8 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
747 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
748 mov r10, r0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
749 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
750 ; unrolling of the loop leads to an average performance gain of |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
751 ; 20-25% |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
752 add16_sse2_cycle 0, 0xc |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
753 add16_sse2_cycle 1, 0x14 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
754 add16_sse2_cycle 2, 0xe |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
755 add16_sse2_cycle 3, 0x16 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
756 add16_sse2_cycle 4, 0x1c |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
757 add16_sse2_cycle 5, 0x24 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
758 add16_sse2_cycle 6, 0x1e |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
759 add16_sse2_cycle 7, 0x26 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
760 RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
761 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
762 ; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset, |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
763 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
764 cglobal h264_idct_add16intra_sse2, 5, 7, 8 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
765 xor r5, r5 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
766 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
767 mov r10, r0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
768 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
769 %ifdef PIC |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
770 lea r11, [scan8_mem] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
771 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
772 .next2blocks |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
773 movzx r0, byte [scan8+r5] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
774 movzx r0, word [r4+r0] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
775 test r0, r0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
776 jz .try_dc |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
777 mov r0d, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
778 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
779 add r0, r10 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
780 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
781 add r0, r0m |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
782 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
783 call x264_add8x4_idct_sse2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
784 add r5, 2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
785 add r2, 64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
786 cmp r5, 16 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
787 jl .next2blocks |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
788 REP_RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
789 .try_dc |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
790 movsx r0, word [r2 ] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
791 or r0w, word [r2+32] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
792 jz .skip2blocks |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
793 mov r0d, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
794 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
795 add r0, r10 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
796 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
797 add r0, r0m |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
798 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
799 call h264_idct_dc_add8_mmx2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
800 .skip2blocks |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
801 add r5, 2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
802 add r2, 64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
803 cmp r5, 16 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
804 jl .next2blocks |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
805 REP_RET |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
806 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
807 h264_idct_add8_sse2_plane: |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
808 .next2blocks |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
809 movzx r0, byte [scan8+r5] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
810 movzx r0, word [r4+r0] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
811 test r0, r0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
812 jz .try_dc |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
813 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
814 mov r0d, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
815 add r0, [r10] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
816 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
817 mov r0, r1m ; XXX r1m here is actually r0m of the calling func |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
818 mov r0, [r0] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
819 add r0, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
820 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
821 call x264_add8x4_idct_sse2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
822 add r5, 2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
823 add r2, 64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
824 test r5, 3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
825 jnz .next2blocks |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
826 rep ret |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
827 .try_dc |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
828 movsx r0, word [r2 ] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
829 or r0w, word [r2+32] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
830 jz .skip2blocks |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
831 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
832 mov r0d, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
833 add r0, [r10] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
834 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
835 mov r0, r1m ; XXX r1m here is actually r0m of the calling func |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
836 mov r0, [r0] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
837 add r0, dword [r1+r5*4] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
838 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
839 call h264_idct_dc_add8_mmx2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
840 .skip2blocks |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
841 add r5, 2 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
842 add r2, 64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
843 test r5, 3 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
844 jnz .next2blocks |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
845 rep ret |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
846 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
847 ; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset, |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
848 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
849 cglobal h264_idct_add8_sse2, 5, 7, 8 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
850 mov r5, 16 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
851 add r2, 512 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
852 %ifdef PIC |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
853 lea r11, [scan8_mem] |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
854 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
855 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
856 mov r10, r0 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
857 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
858 call h264_idct_add8_sse2_plane |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
859 %ifdef ARCH_X86_64 |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
860 add r10, gprsize |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
861 %else |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
862 add r0mp, gprsize |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
863 %endif |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
864 call h264_idct_add8_sse2_plane |
58a960d6e34c
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
rbultje
parents:
diff
changeset
|
865 RET |