annotate x86/dsputil_yasm.asm @ 10300:4d1b9ca628fc libavcodec

Drop unused args from vector_fmul_add_add, simpify code, and rename The src3 and step arguments to vector_fmul_add_add() are always zero and one, respectively. This removes these arguments from the function, simplifies the code accordingly, and renames the function to better match the new operation.
author mru
date Sun, 27 Sep 2009 16:51:54 +0000
parents 31138c296ac6
children 12c8175d6db5
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8430
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
1 ;******************************************************************************
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
2 ;* MMX optimized DSP utils
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
3 ;* Copyright (c) 2008 Loren Merritt
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
4 ;*
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
5 ;* This file is part of FFmpeg.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
6 ;*
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
7 ;* FFmpeg is free software; you can redistribute it and/or
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
8 ;* modify it under the terms of the GNU Lesser General Public
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
9 ;* License as published by the Free Software Foundation; either
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
10 ;* version 2.1 of the License, or (at your option) any later version.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
11 ;*
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
12 ;* FFmpeg is distributed in the hope that it will be useful,
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
13 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
14 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
15 ;* Lesser General Public License for more details.
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
16 ;*
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
17 ;* You should have received a copy of the GNU Lesser General Public
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
18 ;* License along with FFmpeg; if not, write to the Free Software
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
19 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
20 ;******************************************************************************
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
21
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
22 %include "x86inc.asm"
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
23
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
24 section .text align=16
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
25
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
26 %macro PSWAPD_SSE 2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
27 pshufw %1, %2, 0x4e
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
28 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
29 %macro PSWAPD_3DN1 2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
30 movq %1, %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
31 psrlq %1, 32
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
32 punpckldq %1, %2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
33 %endmacro
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
34
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
35 %macro FLOAT_TO_INT16_INTERLEAVE6 1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
36 ; void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
37 cglobal float_to_int16_interleave6_%1, 2,7,0, dst, src, src1, src2, src3, src4, src5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
38 %ifdef ARCH_X86_64
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
39 %define lend r10d
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
40 mov lend, r2d
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
41 %else
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
42 %define lend dword r2m
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
43 %endif
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
44 mov src1q, [srcq+1*gprsize]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
45 mov src2q, [srcq+2*gprsize]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
46 mov src3q, [srcq+3*gprsize]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
47 mov src4q, [srcq+4*gprsize]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
48 mov src5q, [srcq+5*gprsize]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
49 mov srcq, [srcq]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
50 sub src1q, srcq
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
51 sub src2q, srcq
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
52 sub src3q, srcq
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
53 sub src4q, srcq
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
54 sub src5q, srcq
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
55 .loop:
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
56 cvtps2pi mm0, [srcq]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
57 cvtps2pi mm1, [srcq+src1q]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
58 cvtps2pi mm2, [srcq+src2q]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
59 cvtps2pi mm3, [srcq+src3q]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
60 cvtps2pi mm4, [srcq+src4q]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
61 cvtps2pi mm5, [srcq+src5q]
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
62 packssdw mm0, mm3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
63 packssdw mm1, mm4
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
64 packssdw mm2, mm5
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
65 pswapd mm3, mm0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
66 punpcklwd mm0, mm1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
67 punpckhwd mm1, mm2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
68 punpcklwd mm2, mm3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
69 pswapd mm3, mm0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
70 punpckldq mm0, mm2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
71 punpckhdq mm2, mm1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
72 punpckldq mm1, mm3
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
73 movq [dstq ], mm0
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
74 movq [dstq+16], mm2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
75 movq [dstq+ 8], mm1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
76 add srcq, 8
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
77 add dstq, 24
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
78 sub lend, 2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
79 jg .loop
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
80 emms
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
81 RET
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
82 %endmacro ; FLOAT_TO_INT16_INTERLEAVE6
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
83
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
84 %define pswapd PSWAPD_SSE
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
85 FLOAT_TO_INT16_INTERLEAVE6 sse
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
86 %define cvtps2pi pf2id
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
87 %define pswapd PSWAPD_3DN1
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
88 FLOAT_TO_INT16_INTERLEAVE6 3dnow
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
89 %undef pswapd
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
90 FLOAT_TO_INT16_INTERLEAVE6 3dn2
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
91 %undef cvtps2pi
7768bdfd4f7b Rename libavcodec/i386/ --> libavcodec/x86/.
diego
parents:
diff changeset
92
8760
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
93
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
94
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
95 ; void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *top, uint8_t *diff, int w, int *left, int *left_top)
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
96 cglobal add_hfyu_median_prediction_mmx2, 6,6,0, dst, top, diff, w, left, left_top
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
97 movq mm0, [topq]
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
98 movq mm2, mm0
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
99 movd mm4, [left_topq]
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
100 psllq mm2, 8
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
101 movq mm1, mm0
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
102 por mm4, mm2
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
103 movd mm3, [leftq]
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
104 psubb mm0, mm4 ; t-tl
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
105 add dstq, wq
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
106 add topq, wq
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
107 add diffq, wq
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
108 neg wq
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
109 jmp .skip
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
110 .loop:
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
111 movq mm4, [topq+wq]
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
112 movq mm0, mm4
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
113 psllq mm4, 8
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
114 por mm4, mm1
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
115 movq mm1, mm0 ; t
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
116 psubb mm0, mm4 ; t-tl
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
117 .skip:
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
118 movq mm2, [diffq+wq]
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
119 %assign i 0
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
120 %rep 8
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
121 movq mm4, mm0
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
122 paddb mm4, mm3 ; t-tl+l
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
123 movq mm5, mm3
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
124 pmaxub mm3, mm1
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
125 pminub mm5, mm1
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
126 pminub mm3, mm4
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
127 pmaxub mm3, mm5 ; median
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
128 paddb mm3, mm2 ; +residual
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
129 %if i==0
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
130 movq mm7, mm3
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
131 psllq mm7, 56
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
132 %else
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
133 movq mm6, mm3
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
134 psrlq mm7, 8
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
135 psllq mm6, 56
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
136 por mm7, mm6
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
137 %endif
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
138 %if i<7
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
139 psrlq mm0, 8
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
140 psrlq mm1, 8
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
141 psrlq mm2, 8
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
142 %endif
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
143 %assign i i+1
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
144 %endrep
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
145 movq [dstq+wq], mm7
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
146 add wq, 8
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
147 jl .loop
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
148 movzx r2d, byte [dstq-1]
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
149 mov [leftq], r2d
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
150 movzx r2d, byte [topq-1]
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
151 mov [left_topq], r2d
31138c296ac6 ff_add_hfyu_median_prediction_mmx2
lorenm
parents: 8430
diff changeset
152 RET