annotate x86/h264_intrapred.asm @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 09705b027344
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11951
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
1 ;******************************************************************************
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
2 ;* H.264 intra prediction asm optimizations
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
3 ;* Copyright (c) 2010 Jason Garrett-Glaser
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
4 ;*
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
5 ;* This file is part of FFmpeg.
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
6 ;*
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
7 ;* FFmpeg is free software; you can redistribute it and/or
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
8 ;* modify it under the terms of the GNU Lesser General Public
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
9 ;* License as published by the Free Software Foundation; either
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
10 ;* version 2.1 of the License, or (at your option) any later version.
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
11 ;*
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
12 ;* FFmpeg is distributed in the hope that it will be useful,
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
13 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
14 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
15 ;* Lesser General Public License for more details.
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
16 ;*
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
17 ;* You should have received a copy of the GNU Lesser General Public
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
18 ;* License along with FFmpeg; if not, write to the Free Software
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
19 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
20 ;******************************************************************************
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
21
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
22 %include "x86inc.asm"
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
23
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
24 SECTION_RODATA
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
25
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
26 tm_shuf: times 8 db 0x03, 0x80
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
27
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
28 SECTION .text
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
29
12004
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
30 cextern pb_1
11951
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
31 cextern pb_3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
32
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
33 ;-----------------------------------------------------------------------------
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
34 ; void pred16x16_vertical(uint8_t *src, int stride)
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
35 ;-----------------------------------------------------------------------------
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
36
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
37 cglobal pred16x16_vertical_mmx, 2,3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
38 sub r0, r1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
39 mov r2, 8
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
40 movq mm0, [r0+0]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
41 movq mm1, [r0+8]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
42 .loop:
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
43 movq [r0+r1*1+0], mm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
44 movq [r0+r1*1+8], mm1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
45 movq [r0+r1*2+0], mm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
46 movq [r0+r1*2+8], mm1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
47 lea r0, [r0+r1*2]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
48 dec r2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
49 jg .loop
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
50 REP_RET
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
51
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
52 cglobal pred16x16_vertical_sse, 2,3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
53 sub r0, r1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
54 mov r2, 4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
55 movaps xmm0, [r0]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
56 .loop:
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
57 movaps [r0+r1*1], xmm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
58 movaps [r0+r1*2], xmm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
59 lea r0, [r0+r1*2]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
60 movaps [r0+r1*1], xmm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
61 movaps [r0+r1*2], xmm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
62 lea r0, [r0+r1*2]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
63 dec r2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
64 jg .loop
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
65 REP_RET
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
66
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
67 ;-----------------------------------------------------------------------------
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
68 ; void pred16x16_horizontal(uint8_t *src, int stride)
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
69 ;-----------------------------------------------------------------------------
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
70
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
71 %macro PRED16x16_H 1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
72 cglobal pred16x16_horizontal_%1, 2,3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
73 mov r2, 8
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
74 %ifidn %1, ssse3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
75 mova m2, [pb_3]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
76 %endif
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
77 .loop:
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
78 movd m0, [r0+r1*0-4]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
79 movd m1, [r0+r1*1-4]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
80
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
81 %ifidn %1, ssse3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
82 pshufb m0, m2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
83 pshufb m1, m2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
84 %else
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
85 punpcklbw m0, m0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
86 punpcklbw m1, m1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
87 %ifidn %1, mmxext
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
88 pshufw m0, m0, 0xff
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
89 pshufw m1, m1, 0xff
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
90 %else
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
91 punpckhwd m0, m0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
92 punpckhwd m1, m1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
93 punpckhdq m0, m0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
94 punpckhdq m1, m1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
95 %endif
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
96 mova [r0+r1*0+8], m0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
97 mova [r0+r1*1+8], m1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
98 %endif
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
99
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
100 mova [r0+r1*0], m0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
101 mova [r0+r1*1], m1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
102 lea r0, [r0+r1*2]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
103 dec r2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
104 jg .loop
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
105 REP_RET
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
106 %endmacro
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
107
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
108 INIT_MMX
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
109 PRED16x16_H mmx
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
110 PRED16x16_H mmxext
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
111 INIT_XMM
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
112 PRED16x16_H ssse3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
113
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
114 ;-----------------------------------------------------------------------------
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
115 ; void pred16x16_dc(uint8_t *src, int stride)
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
116 ;-----------------------------------------------------------------------------
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
117
12035
09705b027344 Fix h264/vp8 intra pred on Athlon XP
darkshikari
parents: 12004
diff changeset
118 %macro PRED16x16_DC 1
11951
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
119 cglobal pred16x16_dc_%1, 2,7
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
120 mov r4, r0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
121 sub r0, r1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
122 pxor mm0, mm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
123 pxor mm1, mm1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
124 psadbw mm0, [r0+0]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
125 psadbw mm1, [r0+8]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
126 dec r0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
127 movzx r5d, byte [r0+r1*1]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
128 paddw mm0, mm1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
129 movd r6d, mm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
130 lea r0, [r0+r1*2]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
131 %rep 7
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
132 movzx r2d, byte [r0+r1*0]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
133 movzx r3d, byte [r0+r1*1]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
134 add r5d, r2d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
135 add r6d, r3d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
136 lea r0, [r0+r1*2]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
137 %endrep
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
138 movzx r2d, byte [r0+r1*0]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
139 add r5d, r6d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
140 lea r2d, [r2+r5+16]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
141 shr r2d, 5
12001
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
142 %ifidn %1, mmxext
11951
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
143 movd m0, r2d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
144 punpcklbw m0, m0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
145 pshufw m0, m0, 0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
146 %elifidn %1, sse2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
147 movd m0, r2d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
148 punpcklbw m0, m0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
149 pshuflw m0, m0, 0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
150 punpcklqdq m0, m0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
151 %elifidn %1, ssse3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
152 pxor m1, m1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
153 movd m0, r2d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
154 pshufb m0, m1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
155 %endif
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
156
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
157 %if mmsize==8
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
158 mov r3d, 8
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
159 .loop:
12035
09705b027344 Fix h264/vp8 intra pred on Athlon XP
darkshikari
parents: 12004
diff changeset
160 mova [r4+r1*0+0], m0
09705b027344 Fix h264/vp8 intra pred on Athlon XP
darkshikari
parents: 12004
diff changeset
161 mova [r4+r1*0+8], m0
09705b027344 Fix h264/vp8 intra pred on Athlon XP
darkshikari
parents: 12004
diff changeset
162 mova [r4+r1*1+0], m0
09705b027344 Fix h264/vp8 intra pred on Athlon XP
darkshikari
parents: 12004
diff changeset
163 mova [r4+r1*1+8], m0
11951
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
164 %else
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
165 mov r3d, 4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
166 .loop:
12035
09705b027344 Fix h264/vp8 intra pred on Athlon XP
darkshikari
parents: 12004
diff changeset
167 mova [r4+r1*0], m0
09705b027344 Fix h264/vp8 intra pred on Athlon XP
darkshikari
parents: 12004
diff changeset
168 mova [r4+r1*1], m0
11951
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
169 lea r4, [r4+r1*2]
12035
09705b027344 Fix h264/vp8 intra pred on Athlon XP
darkshikari
parents: 12004
diff changeset
170 mova [r4+r1*0], m0
09705b027344 Fix h264/vp8 intra pred on Athlon XP
darkshikari
parents: 12004
diff changeset
171 mova [r4+r1*1], m0
11951
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
172 %endif
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
173 lea r4, [r4+r1*2]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
174 dec r3d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
175 jg .loop
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
176 REP_RET
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
177 %endmacro
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
178
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
179 INIT_MMX
12035
09705b027344 Fix h264/vp8 intra pred on Athlon XP
darkshikari
parents: 12004
diff changeset
180 PRED16x16_DC mmxext
11951
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
181 INIT_XMM
12035
09705b027344 Fix h264/vp8 intra pred on Athlon XP
darkshikari
parents: 12004
diff changeset
182 PRED16x16_DC sse2
09705b027344 Fix h264/vp8 intra pred on Athlon XP
darkshikari
parents: 12004
diff changeset
183 PRED16x16_DC ssse3
11951
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
184
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
185 ;-----------------------------------------------------------------------------
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
186 ; void pred16x16_tm_vp8(uint8_t *src, int stride)
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
187 ;-----------------------------------------------------------------------------
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
188
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
189 %macro PRED16x16_TM_MMX 1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
190 cglobal pred16x16_tm_vp8_%1, 2,5
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
191 sub r0, r1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
192 pxor mm7, mm7
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
193 movq mm0, [r0+0]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
194 movq mm2, [r0+8]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
195 movq mm1, mm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
196 movq mm3, mm2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
197 punpcklbw mm0, mm7
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
198 punpckhbw mm1, mm7
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
199 punpcklbw mm2, mm7
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
200 punpckhbw mm3, mm7
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
201 movzx r3d, byte [r0-1]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
202 mov r4d, 16
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
203 .loop:
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
204 movzx r2d, byte [r0+r1-1]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
205 sub r2d, r3d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
206 movd mm4, r2d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
207 %ifidn %1, mmx
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
208 punpcklwd mm4, mm4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
209 punpckldq mm4, mm4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
210 %else
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
211 pshufw mm4, mm4, 0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
212 %endif
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
213 movq mm5, mm4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
214 movq mm6, mm4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
215 movq mm7, mm4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
216 paddw mm4, mm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
217 paddw mm5, mm1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
218 paddw mm6, mm2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
219 paddw mm7, mm3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
220 packuswb mm4, mm5
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
221 packuswb mm6, mm7
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
222 movq [r0+r1+0], mm4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
223 movq [r0+r1+8], mm6
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
224 add r0, r1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
225 dec r4d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
226 jg .loop
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
227 REP_RET
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
228 %endmacro
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
229
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
230 PRED16x16_TM_MMX mmx
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
231 PRED16x16_TM_MMX mmxext
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
232
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
233 cglobal pred16x16_tm_vp8_sse2, 2,6,6
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
234 sub r0, r1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
235 pxor xmm2, xmm2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
236 movdqa xmm0, [r0]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
237 movdqa xmm1, xmm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
238 punpcklbw xmm0, xmm2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
239 punpckhbw xmm1, xmm2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
240 movzx r4d, byte [r0-1]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
241 mov r5d, 8
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
242 .loop:
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
243 movzx r2d, byte [r0+r1*1-1]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
244 movzx r3d, byte [r0+r1*2-1]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
245 sub r2d, r4d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
246 sub r3d, r4d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
247 movd xmm2, r2d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
248 movd xmm4, r3d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
249 pshuflw xmm2, xmm2, 0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
250 pshuflw xmm4, xmm4, 0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
251 punpcklqdq xmm2, xmm2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
252 punpcklqdq xmm4, xmm4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
253 movdqa xmm3, xmm2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
254 movdqa xmm5, xmm4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
255 paddw xmm2, xmm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
256 paddw xmm3, xmm1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
257 paddw xmm4, xmm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
258 paddw xmm5, xmm1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
259 packuswb xmm2, xmm3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
260 packuswb xmm4, xmm5
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
261 movdqa [r0+r1*1], xmm2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
262 movdqa [r0+r1*2], xmm4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
263 lea r0, [r0+r1*2]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
264 dec r5d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
265 jg .loop
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
266 REP_RET
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
267
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
268 ;-----------------------------------------------------------------------------
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
269 ; void pred8x8_vertical(uint8_t *src, int stride)
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
270 ;-----------------------------------------------------------------------------
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
271
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
272 cglobal pred8x8_vertical_mmx, 2,2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
273 sub r0, r1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
274 movq mm0, [r0]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
275 %rep 3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
276 movq [r0+r1*1], mm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
277 movq [r0+r1*2], mm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
278 lea r0, [r0+r1*2]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
279 %endrep
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
280 movq [r0+r1*1], mm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
281 movq [r0+r1*2], mm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
282 RET
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
283
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
284 ;-----------------------------------------------------------------------------
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
285 ; void pred8x8_horizontal(uint8_t *src, int stride)
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
286 ;-----------------------------------------------------------------------------
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
287
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
288 %macro PRED8x8_H 1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
289 cglobal pred8x8_horizontal_%1, 2,3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
290 mov r2, 4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
291 %ifidn %1, ssse3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
292 mova m2, [pb_3]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
293 %endif
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
294 .loop:
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
295 movd m0, [r0+r1*0-4]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
296 movd m1, [r0+r1*1-4]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
297 %ifidn %1, ssse3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
298 pshufb m0, m2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
299 pshufb m1, m2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
300 %else
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
301 punpcklbw m0, m0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
302 punpcklbw m1, m1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
303 %ifidn %1, mmxext
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
304 pshufw m0, m0, 0xff
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
305 pshufw m1, m1, 0xff
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
306 %else
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
307 punpckhwd m0, m0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
308 punpckhwd m1, m1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
309 punpckhdq m0, m0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
310 punpckhdq m1, m1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
311 %endif
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
312 %endif
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
313 mova [r0+r1*0], m0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
314 mova [r0+r1*1], m1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
315 lea r0, [r0+r1*2]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
316 dec r2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
317 jg .loop
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
318 REP_RET
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
319 %endmacro
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
320
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
321 INIT_MMX
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
322 PRED8x8_H mmx
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
323 PRED8x8_H mmxext
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
324 PRED8x8_H ssse3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
325
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
326 ;-----------------------------------------------------------------------------
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
327 ; void pred8x8_dc_rv40(uint8_t *src, int stride)
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
328 ;-----------------------------------------------------------------------------
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
329
12001
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
330 cglobal pred8x8_dc_rv40_mmxext, 2,7
11951
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
331 mov r4, r0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
332 sub r0, r1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
333 pxor mm0, mm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
334 psadbw mm0, [r0]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
335 dec r0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
336 movzx r5d, byte [r0+r1*1]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
337 movd r6d, mm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
338 lea r0, [r0+r1*2]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
339 %rep 3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
340 movzx r2d, byte [r0+r1*0]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
341 movzx r3d, byte [r0+r1*1]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
342 add r5d, r2d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
343 add r6d, r3d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
344 lea r0, [r0+r1*2]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
345 %endrep
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
346 movzx r2d, byte [r0+r1*0]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
347 add r5d, r6d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
348 lea r2d, [r2+r5+8]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
349 shr r2d, 4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
350 movd mm0, r2d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
351 punpcklbw mm0, mm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
352 pshufw mm0, mm0, 0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
353 mov r3d, 4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
354 .loop:
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
355 movq [r4+r1*0], mm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
356 movq [r4+r1*1], mm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
357 lea r4, [r4+r1*2]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
358 dec r3d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
359 jg .loop
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
360 REP_RET
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
361
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
362 ;-----------------------------------------------------------------------------
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
363 ; void pred8x8_tm_vp8(uint8_t *src, int stride)
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
364 ;-----------------------------------------------------------------------------
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
365
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
366 %macro PRED8x8_TM_MMX 1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
367 cglobal pred8x8_tm_vp8_%1, 2,6
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
368 sub r0, r1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
369 pxor mm7, mm7
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
370 movq mm0, [r0]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
371 movq mm1, mm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
372 punpcklbw mm0, mm7
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
373 punpckhbw mm1, mm7
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
374 movzx r4d, byte [r0-1]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
375 mov r5d, 4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
376 .loop:
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
377 movzx r2d, byte [r0+r1*1-1]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
378 movzx r3d, byte [r0+r1*2-1]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
379 sub r2d, r4d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
380 sub r3d, r4d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
381 movd mm2, r2d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
382 movd mm4, r3d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
383 %ifidn %1, mmx
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
384 punpcklwd mm2, mm2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
385 punpcklwd mm4, mm4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
386 punpckldq mm2, mm2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
387 punpckldq mm4, mm4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
388 %else
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
389 pshufw mm2, mm2, 0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
390 pshufw mm4, mm4, 0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
391 %endif
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
392 movq mm3, mm2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
393 movq mm5, mm4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
394 paddw mm2, mm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
395 paddw mm3, mm1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
396 paddw mm4, mm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
397 paddw mm5, mm1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
398 packuswb mm2, mm3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
399 packuswb mm4, mm5
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
400 movq [r0+r1*1], mm2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
401 movq [r0+r1*2], mm4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
402 lea r0, [r0+r1*2]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
403 dec r5d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
404 jg .loop
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
405 REP_RET
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
406 %endmacro
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
407
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
408 PRED8x8_TM_MMX mmx
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
409 PRED8x8_TM_MMX mmxext
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
410
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
411 cglobal pred8x8_tm_vp8_sse2, 2,6,4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
412 sub r0, r1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
413 pxor xmm1, xmm1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
414 movq xmm0, [r0]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
415 punpcklbw xmm0, xmm1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
416 movzx r4d, byte [r0-1]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
417 mov r5d, 4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
418 .loop:
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
419 movzx r2d, byte [r0+r1*1-1]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
420 movzx r3d, byte [r0+r1*2-1]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
421 sub r2d, r4d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
422 sub r3d, r4d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
423 movd xmm2, r2d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
424 movd xmm3, r3d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
425 pshuflw xmm2, xmm2, 0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
426 pshuflw xmm3, xmm3, 0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
427 punpcklqdq xmm2, xmm2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
428 punpcklqdq xmm3, xmm3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
429 paddw xmm2, xmm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
430 paddw xmm3, xmm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
431 packuswb xmm2, xmm3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
432 movq [r0+r1*1], xmm2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
433 movhps [r0+r1*2], xmm2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
434 lea r0, [r0+r1*2]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
435 dec r5d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
436 jg .loop
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
437 REP_RET
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
438
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
439 cglobal pred8x8_tm_vp8_ssse3, 2,3,6
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
440 sub r0, r1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
441 movdqa xmm4, [tm_shuf]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
442 pxor xmm1, xmm1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
443 movq xmm0, [r0]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
444 punpcklbw xmm0, xmm1
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
445 movd xmm5, [r0-4]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
446 pshufb xmm5, xmm4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
447 mov r2d, 4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
448 .loop:
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
449 movd xmm2, [r0+r1*1-4]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
450 movd xmm3, [r0+r1*2-4]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
451 pshufb xmm2, xmm4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
452 pshufb xmm3, xmm4
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
453 psubw xmm2, xmm5
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
454 psubw xmm3, xmm5
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
455 paddw xmm2, xmm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
456 paddw xmm3, xmm0
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
457 packuswb xmm2, xmm3
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
458 movq [r0+r1*1], xmm2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
459 movhps [r0+r1*2], xmm2
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
460 lea r0, [r0+r1*2]
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
461 dec r2d
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
462 jg .loop
afee30fe8c26 16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff changeset
463 REP_RET
12001
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
464
12002
9c8584f68754 Add missing comment header for predict_4x4_dc_mmxext
darkshikari
parents: 12001
diff changeset
465 ;-----------------------------------------------------------------------------
9c8584f68754 Add missing comment header for predict_4x4_dc_mmxext
darkshikari
parents: 12001
diff changeset
466 ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride)
9c8584f68754 Add missing comment header for predict_4x4_dc_mmxext
darkshikari
parents: 12001
diff changeset
467 ;-----------------------------------------------------------------------------
9c8584f68754 Add missing comment header for predict_4x4_dc_mmxext
darkshikari
parents: 12001
diff changeset
468
12001
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
469 cglobal pred4x4_dc_mmxext, 3,5
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
470 pxor mm7, mm7
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
471 mov r4, r0
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
472 sub r0, r2
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
473 movd mm0, [r0]
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
474 psadbw mm0, mm7
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
475 movzx r1d, byte [r0+r2*1-1]
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
476 movd r3d, mm0
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
477 add r3d, r1d
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
478 movzx r1d, byte [r0+r2*2-1]
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
479 lea r0, [r0+r2*2]
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
480 add r3d, r1d
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
481 movzx r1d, byte [r0+r2*1-1]
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
482 add r3d, r1d
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
483 movzx r1d, byte [r0+r2*2-1]
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
484 add r3d, r1d
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
485 add r3d, 4
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
486 shr r3d, 3
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
487 imul r3d, 0x01010101
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
488 mov [r4+r2*0], r3d
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
489 mov [r0+r2*0], r3d
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
490 mov [r0+r2*1], r3d
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
491 mov [r0+r2*2], r3d
953a0949c789 Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents: 11951
diff changeset
492 RET
12003
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
493
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
494 ;-----------------------------------------------------------------------------
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
495 ; void pred4x4_tm_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride)
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
496 ;-----------------------------------------------------------------------------
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
497
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
498 %macro PRED4x4_TM_MMX 1
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
499 cglobal pred4x4_tm_vp8_%1, 3,6
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
500 sub r0, r2
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
501 pxor mm7, mm7
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
502 movd mm0, [r0]
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
503 punpcklbw mm0, mm7
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
504 movzx r4d, byte [r0-1]
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
505 mov r5d, 2
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
506 .loop:
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
507 movzx r1d, byte [r0+r2*1-1]
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
508 movzx r3d, byte [r0+r2*2-1]
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
509 sub r1d, r4d
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
510 sub r3d, r4d
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
511 movd mm2, r1d
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
512 movd mm4, r3d
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
513 %ifidn %1, mmx
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
514 punpcklwd mm2, mm2
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
515 punpcklwd mm4, mm4
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
516 punpckldq mm2, mm2
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
517 punpckldq mm4, mm4
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
518 %else
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
519 pshufw mm2, mm2, 0
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
520 pshufw mm4, mm4, 0
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
521 %endif
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
522 paddw mm2, mm0
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
523 paddw mm4, mm0
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
524 packuswb mm2, mm2
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
525 packuswb mm4, mm4
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
526 movd [r0+r2*1], mm2
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
527 movd [r0+r2*2], mm4
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
528 lea r0, [r0+r2*2]
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
529 dec r5d
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
530 jg .loop
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
531 REP_RET
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
532 %endmacro
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
533
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
534 PRED4x4_TM_MMX mmx
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
535 PRED4x4_TM_MMX mmxext
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
536
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
537 cglobal pred4x4_tm_vp8_ssse3, 3,3
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
538 sub r0, r2
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
539 movq mm6, [tm_shuf]
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
540 pxor mm1, mm1
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
541 movd mm0, [r0]
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
542 punpcklbw mm0, mm1
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
543 movd mm7, [r0-4]
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
544 pshufb mm7, mm6
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
545 lea r1, [r0+r2*2]
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
546 movd mm2, [r0+r2*1-4]
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
547 movd mm3, [r0+r2*2-4]
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
548 movd mm4, [r1+r2*1-4]
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
549 movd mm5, [r1+r2*2-4]
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
550 pshufb mm2, mm6
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
551 pshufb mm3, mm6
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
552 pshufb mm4, mm6
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
553 pshufb mm5, mm6
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
554 psubw mm2, mm7
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
555 psubw mm3, mm7
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
556 psubw mm4, mm7
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
557 psubw mm5, mm7
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
558 paddw mm2, mm0
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
559 paddw mm3, mm0
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
560 paddw mm4, mm0
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
561 paddw mm5, mm0
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
562 packuswb mm2, mm2
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
563 packuswb mm3, mm3
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
564 packuswb mm4, mm4
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
565 packuswb mm5, mm5
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
566 movd [r0+r2*1], mm2
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
567 movd [r0+r2*2], mm3
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
568 movd [r1+r2*1], mm4
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
569 movd [r1+r2*2], mm5
3b761226ea35 Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents: 12002
diff changeset
570 RET
12004
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
571
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
572 ; dest, left, right, src, tmp
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
573 ; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
574 %macro PRED4x4_LOWPASS 5
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
575 mova %5, %2
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
576 pavgb %2, %3
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
577 pxor %3, %5
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
578 mova %1, %4
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
579 pand %3, [pb_1]
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
580 psubusb %2, %3
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
581 pavgb %1, %2
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
582 %endmacro
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
583
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
584 ;-----------------------------------------------------------------------------
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
585 ; void pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride)
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
586 ;-----------------------------------------------------------------------------
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
587
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
588 INIT_MMX
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
589 cglobal pred4x4_vertical_vp8_mmxext, 3,3
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
590 sub r0, r2
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
591 movd m1, [r0-1]
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
592 movd m0, [r0]
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
593 mova m2, m0 ;t0 t1 t2 t3
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
594 punpckldq m0, [r1] ;t0 t1 t2 t3 t4 t5 t6 t7
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
595 lea r1, [r0+r2*2]
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
596 psrlq m0, 8 ;t1 t2 t3 t4
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
597 PRED4x4_LOWPASS m3, m1, m0, m2, m4
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
598 movd [r0+r2*1], m3
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
599 movd [r0+r2*2], m3
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
600 movd [r1+r2*1], m3
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
601 movd [r1+r2*2], m3
2002ea7c06f6 MMXEXT version of vp8 4x4 vertical pred
darkshikari
parents: 12003
diff changeset
602 RET