Mercurial > libavcodec.hg
annotate x86/h264_intrapred.asm @ 12483:0159a19bfff7 libavcodec
aacdec: Rework channel mapping compatibility hacks.
For a PCE based configuration map the channels solely based on tags.
For an indexed configuration map the channels solely based on position.
This works with all known exotic samples including al17, elem_id0, bad_concat,
and lfe_is_sce.
author | alexc |
---|---|
date | Fri, 10 Sep 2010 18:01:48 +0000 |
parents | 09705b027344 |
children |
rev | line source |
---|---|
11951
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
1 ;****************************************************************************** |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
2 ;* H.264 intra prediction asm optimizations |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
3 ;* Copyright (c) 2010 Jason Garrett-Glaser |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
4 ;* |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
5 ;* This file is part of FFmpeg. |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
6 ;* |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
7 ;* FFmpeg is free software; you can redistribute it and/or |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
8 ;* modify it under the terms of the GNU Lesser General Public |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
9 ;* License as published by the Free Software Foundation; either |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
10 ;* version 2.1 of the License, or (at your option) any later version. |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
11 ;* |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
12 ;* FFmpeg is distributed in the hope that it will be useful, |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
13 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
14 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
15 ;* Lesser General Public License for more details. |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
16 ;* |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
17 ;* You should have received a copy of the GNU Lesser General Public |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
18 ;* License along with FFmpeg; if not, write to the Free Software |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
19 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
20 ;****************************************************************************** |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
21 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
22 %include "x86inc.asm" |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
23 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
24 SECTION_RODATA |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
25 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
26 tm_shuf: times 8 db 0x03, 0x80 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
27 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
28 SECTION .text |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
29 |
12004 | 30 cextern pb_1 |
11951
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
31 cextern pb_3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
32 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
33 ;----------------------------------------------------------------------------- |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
34 ; void pred16x16_vertical(uint8_t *src, int stride) |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
35 ;----------------------------------------------------------------------------- |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
36 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
37 cglobal pred16x16_vertical_mmx, 2,3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
38 sub r0, r1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
39 mov r2, 8 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
40 movq mm0, [r0+0] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
41 movq mm1, [r0+8] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
42 .loop: |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
43 movq [r0+r1*1+0], mm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
44 movq [r0+r1*1+8], mm1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
45 movq [r0+r1*2+0], mm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
46 movq [r0+r1*2+8], mm1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
47 lea r0, [r0+r1*2] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
48 dec r2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
49 jg .loop |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
50 REP_RET |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
51 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
52 cglobal pred16x16_vertical_sse, 2,3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
53 sub r0, r1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
54 mov r2, 4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
55 movaps xmm0, [r0] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
56 .loop: |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
57 movaps [r0+r1*1], xmm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
58 movaps [r0+r1*2], xmm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
59 lea r0, [r0+r1*2] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
60 movaps [r0+r1*1], xmm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
61 movaps [r0+r1*2], xmm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
62 lea r0, [r0+r1*2] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
63 dec r2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
64 jg .loop |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
65 REP_RET |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
66 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
67 ;----------------------------------------------------------------------------- |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
68 ; void pred16x16_horizontal(uint8_t *src, int stride) |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
69 ;----------------------------------------------------------------------------- |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
70 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
71 %macro PRED16x16_H 1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
72 cglobal pred16x16_horizontal_%1, 2,3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
73 mov r2, 8 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
74 %ifidn %1, ssse3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
75 mova m2, [pb_3] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
76 %endif |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
77 .loop: |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
78 movd m0, [r0+r1*0-4] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
79 movd m1, [r0+r1*1-4] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
80 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
81 %ifidn %1, ssse3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
82 pshufb m0, m2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
83 pshufb m1, m2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
84 %else |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
85 punpcklbw m0, m0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
86 punpcklbw m1, m1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
87 %ifidn %1, mmxext |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
88 pshufw m0, m0, 0xff |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
89 pshufw m1, m1, 0xff |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
90 %else |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
91 punpckhwd m0, m0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
92 punpckhwd m1, m1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
93 punpckhdq m0, m0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
94 punpckhdq m1, m1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
95 %endif |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
96 mova [r0+r1*0+8], m0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
97 mova [r0+r1*1+8], m1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
98 %endif |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
99 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
100 mova [r0+r1*0], m0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
101 mova [r0+r1*1], m1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
102 lea r0, [r0+r1*2] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
103 dec r2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
104 jg .loop |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
105 REP_RET |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
106 %endmacro |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
107 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
108 INIT_MMX |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
109 PRED16x16_H mmx |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
110 PRED16x16_H mmxext |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
111 INIT_XMM |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
112 PRED16x16_H ssse3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
113 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
114 ;----------------------------------------------------------------------------- |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
115 ; void pred16x16_dc(uint8_t *src, int stride) |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
116 ;----------------------------------------------------------------------------- |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
117 |
12035 | 118 %macro PRED16x16_DC 1 |
11951
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
119 cglobal pred16x16_dc_%1, 2,7 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
120 mov r4, r0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
121 sub r0, r1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
122 pxor mm0, mm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
123 pxor mm1, mm1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
124 psadbw mm0, [r0+0] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
125 psadbw mm1, [r0+8] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
126 dec r0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
127 movzx r5d, byte [r0+r1*1] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
128 paddw mm0, mm1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
129 movd r6d, mm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
130 lea r0, [r0+r1*2] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
131 %rep 7 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
132 movzx r2d, byte [r0+r1*0] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
133 movzx r3d, byte [r0+r1*1] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
134 add r5d, r2d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
135 add r6d, r3d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
136 lea r0, [r0+r1*2] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
137 %endrep |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
138 movzx r2d, byte [r0+r1*0] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
139 add r5d, r6d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
140 lea r2d, [r2+r5+16] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
141 shr r2d, 5 |
12001
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
142 %ifidn %1, mmxext |
11951
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
143 movd m0, r2d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
144 punpcklbw m0, m0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
145 pshufw m0, m0, 0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
146 %elifidn %1, sse2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
147 movd m0, r2d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
148 punpcklbw m0, m0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
149 pshuflw m0, m0, 0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
150 punpcklqdq m0, m0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
151 %elifidn %1, ssse3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
152 pxor m1, m1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
153 movd m0, r2d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
154 pshufb m0, m1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
155 %endif |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
156 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
157 %if mmsize==8 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
158 mov r3d, 8 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
159 .loop: |
12035 | 160 mova [r4+r1*0+0], m0 |
161 mova [r4+r1*0+8], m0 | |
162 mova [r4+r1*1+0], m0 | |
163 mova [r4+r1*1+8], m0 | |
11951
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
164 %else |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
165 mov r3d, 4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
166 .loop: |
12035 | 167 mova [r4+r1*0], m0 |
168 mova [r4+r1*1], m0 | |
11951
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
169 lea r4, [r4+r1*2] |
12035 | 170 mova [r4+r1*0], m0 |
171 mova [r4+r1*1], m0 | |
11951
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
172 %endif |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
173 lea r4, [r4+r1*2] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
174 dec r3d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
175 jg .loop |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
176 REP_RET |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
177 %endmacro |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
178 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
179 INIT_MMX |
12035 | 180 PRED16x16_DC mmxext |
11951
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
181 INIT_XMM |
12035 | 182 PRED16x16_DC sse2 |
183 PRED16x16_DC ssse3 | |
11951
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
184 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
185 ;----------------------------------------------------------------------------- |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
186 ; void pred16x16_tm_vp8(uint8_t *src, int stride) |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
187 ;----------------------------------------------------------------------------- |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
188 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
189 %macro PRED16x16_TM_MMX 1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
190 cglobal pred16x16_tm_vp8_%1, 2,5 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
191 sub r0, r1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
192 pxor mm7, mm7 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
193 movq mm0, [r0+0] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
194 movq mm2, [r0+8] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
195 movq mm1, mm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
196 movq mm3, mm2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
197 punpcklbw mm0, mm7 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
198 punpckhbw mm1, mm7 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
199 punpcklbw mm2, mm7 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
200 punpckhbw mm3, mm7 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
201 movzx r3d, byte [r0-1] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
202 mov r4d, 16 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
203 .loop: |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
204 movzx r2d, byte [r0+r1-1] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
205 sub r2d, r3d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
206 movd mm4, r2d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
207 %ifidn %1, mmx |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
208 punpcklwd mm4, mm4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
209 punpckldq mm4, mm4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
210 %else |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
211 pshufw mm4, mm4, 0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
212 %endif |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
213 movq mm5, mm4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
214 movq mm6, mm4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
215 movq mm7, mm4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
216 paddw mm4, mm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
217 paddw mm5, mm1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
218 paddw mm6, mm2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
219 paddw mm7, mm3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
220 packuswb mm4, mm5 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
221 packuswb mm6, mm7 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
222 movq [r0+r1+0], mm4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
223 movq [r0+r1+8], mm6 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
224 add r0, r1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
225 dec r4d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
226 jg .loop |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
227 REP_RET |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
228 %endmacro |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
229 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
230 PRED16x16_TM_MMX mmx |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
231 PRED16x16_TM_MMX mmxext |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
232 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
233 cglobal pred16x16_tm_vp8_sse2, 2,6,6 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
234 sub r0, r1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
235 pxor xmm2, xmm2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
236 movdqa xmm0, [r0] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
237 movdqa xmm1, xmm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
238 punpcklbw xmm0, xmm2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
239 punpckhbw xmm1, xmm2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
240 movzx r4d, byte [r0-1] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
241 mov r5d, 8 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
242 .loop: |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
243 movzx r2d, byte [r0+r1*1-1] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
244 movzx r3d, byte [r0+r1*2-1] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
245 sub r2d, r4d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
246 sub r3d, r4d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
247 movd xmm2, r2d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
248 movd xmm4, r3d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
249 pshuflw xmm2, xmm2, 0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
250 pshuflw xmm4, xmm4, 0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
251 punpcklqdq xmm2, xmm2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
252 punpcklqdq xmm4, xmm4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
253 movdqa xmm3, xmm2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
254 movdqa xmm5, xmm4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
255 paddw xmm2, xmm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
256 paddw xmm3, xmm1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
257 paddw xmm4, xmm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
258 paddw xmm5, xmm1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
259 packuswb xmm2, xmm3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
260 packuswb xmm4, xmm5 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
261 movdqa [r0+r1*1], xmm2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
262 movdqa [r0+r1*2], xmm4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
263 lea r0, [r0+r1*2] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
264 dec r5d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
265 jg .loop |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
266 REP_RET |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
267 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
268 ;----------------------------------------------------------------------------- |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
269 ; void pred8x8_vertical(uint8_t *src, int stride) |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
270 ;----------------------------------------------------------------------------- |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
271 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
272 cglobal pred8x8_vertical_mmx, 2,2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
273 sub r0, r1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
274 movq mm0, [r0] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
275 %rep 3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
276 movq [r0+r1*1], mm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
277 movq [r0+r1*2], mm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
278 lea r0, [r0+r1*2] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
279 %endrep |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
280 movq [r0+r1*1], mm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
281 movq [r0+r1*2], mm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
282 RET |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
283 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
284 ;----------------------------------------------------------------------------- |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
285 ; void pred8x8_horizontal(uint8_t *src, int stride) |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
286 ;----------------------------------------------------------------------------- |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
287 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
288 %macro PRED8x8_H 1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
289 cglobal pred8x8_horizontal_%1, 2,3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
290 mov r2, 4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
291 %ifidn %1, ssse3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
292 mova m2, [pb_3] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
293 %endif |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
294 .loop: |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
295 movd m0, [r0+r1*0-4] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
296 movd m1, [r0+r1*1-4] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
297 %ifidn %1, ssse3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
298 pshufb m0, m2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
299 pshufb m1, m2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
300 %else |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
301 punpcklbw m0, m0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
302 punpcklbw m1, m1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
303 %ifidn %1, mmxext |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
304 pshufw m0, m0, 0xff |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
305 pshufw m1, m1, 0xff |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
306 %else |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
307 punpckhwd m0, m0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
308 punpckhwd m1, m1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
309 punpckhdq m0, m0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
310 punpckhdq m1, m1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
311 %endif |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
312 %endif |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
313 mova [r0+r1*0], m0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
314 mova [r0+r1*1], m1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
315 lea r0, [r0+r1*2] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
316 dec r2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
317 jg .loop |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
318 REP_RET |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
319 %endmacro |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
320 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
321 INIT_MMX |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
322 PRED8x8_H mmx |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
323 PRED8x8_H mmxext |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
324 PRED8x8_H ssse3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
325 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
326 ;----------------------------------------------------------------------------- |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
327 ; void pred8x8_dc_rv40(uint8_t *src, int stride) |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
328 ;----------------------------------------------------------------------------- |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
329 |
12001
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
330 cglobal pred8x8_dc_rv40_mmxext, 2,7 |
11951
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
331 mov r4, r0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
332 sub r0, r1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
333 pxor mm0, mm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
334 psadbw mm0, [r0] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
335 dec r0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
336 movzx r5d, byte [r0+r1*1] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
337 movd r6d, mm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
338 lea r0, [r0+r1*2] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
339 %rep 3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
340 movzx r2d, byte [r0+r1*0] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
341 movzx r3d, byte [r0+r1*1] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
342 add r5d, r2d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
343 add r6d, r3d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
344 lea r0, [r0+r1*2] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
345 %endrep |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
346 movzx r2d, byte [r0+r1*0] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
347 add r5d, r6d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
348 lea r2d, [r2+r5+8] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
349 shr r2d, 4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
350 movd mm0, r2d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
351 punpcklbw mm0, mm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
352 pshufw mm0, mm0, 0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
353 mov r3d, 4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
354 .loop: |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
355 movq [r4+r1*0], mm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
356 movq [r4+r1*1], mm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
357 lea r4, [r4+r1*2] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
358 dec r3d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
359 jg .loop |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
360 REP_RET |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
361 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
362 ;----------------------------------------------------------------------------- |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
363 ; void pred8x8_tm_vp8(uint8_t *src, int stride) |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
364 ;----------------------------------------------------------------------------- |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
365 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
366 %macro PRED8x8_TM_MMX 1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
367 cglobal pred8x8_tm_vp8_%1, 2,6 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
368 sub r0, r1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
369 pxor mm7, mm7 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
370 movq mm0, [r0] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
371 movq mm1, mm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
372 punpcklbw mm0, mm7 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
373 punpckhbw mm1, mm7 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
374 movzx r4d, byte [r0-1] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
375 mov r5d, 4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
376 .loop: |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
377 movzx r2d, byte [r0+r1*1-1] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
378 movzx r3d, byte [r0+r1*2-1] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
379 sub r2d, r4d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
380 sub r3d, r4d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
381 movd mm2, r2d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
382 movd mm4, r3d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
383 %ifidn %1, mmx |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
384 punpcklwd mm2, mm2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
385 punpcklwd mm4, mm4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
386 punpckldq mm2, mm2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
387 punpckldq mm4, mm4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
388 %else |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
389 pshufw mm2, mm2, 0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
390 pshufw mm4, mm4, 0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
391 %endif |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
392 movq mm3, mm2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
393 movq mm5, mm4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
394 paddw mm2, mm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
395 paddw mm3, mm1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
396 paddw mm4, mm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
397 paddw mm5, mm1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
398 packuswb mm2, mm3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
399 packuswb mm4, mm5 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
400 movq [r0+r1*1], mm2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
401 movq [r0+r1*2], mm4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
402 lea r0, [r0+r1*2] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
403 dec r5d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
404 jg .loop |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
405 REP_RET |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
406 %endmacro |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
407 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
408 PRED8x8_TM_MMX mmx |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
409 PRED8x8_TM_MMX mmxext |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
410 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
411 cglobal pred8x8_tm_vp8_sse2, 2,6,4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
412 sub r0, r1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
413 pxor xmm1, xmm1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
414 movq xmm0, [r0] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
415 punpcklbw xmm0, xmm1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
416 movzx r4d, byte [r0-1] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
417 mov r5d, 4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
418 .loop: |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
419 movzx r2d, byte [r0+r1*1-1] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
420 movzx r3d, byte [r0+r1*2-1] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
421 sub r2d, r4d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
422 sub r3d, r4d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
423 movd xmm2, r2d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
424 movd xmm3, r3d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
425 pshuflw xmm2, xmm2, 0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
426 pshuflw xmm3, xmm3, 0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
427 punpcklqdq xmm2, xmm2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
428 punpcklqdq xmm3, xmm3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
429 paddw xmm2, xmm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
430 paddw xmm3, xmm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
431 packuswb xmm2, xmm3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
432 movq [r0+r1*1], xmm2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
433 movhps [r0+r1*2], xmm2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
434 lea r0, [r0+r1*2] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
435 dec r5d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
436 jg .loop |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
437 REP_RET |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
438 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
439 cglobal pred8x8_tm_vp8_ssse3, 2,3,6 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
440 sub r0, r1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
441 movdqa xmm4, [tm_shuf] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
442 pxor xmm1, xmm1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
443 movq xmm0, [r0] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
444 punpcklbw xmm0, xmm1 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
445 movd xmm5, [r0-4] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
446 pshufb xmm5, xmm4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
447 mov r2d, 4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
448 .loop: |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
449 movd xmm2, [r0+r1*1-4] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
450 movd xmm3, [r0+r1*2-4] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
451 pshufb xmm2, xmm4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
452 pshufb xmm3, xmm4 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
453 psubw xmm2, xmm5 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
454 psubw xmm3, xmm5 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
455 paddw xmm2, xmm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
456 paddw xmm3, xmm0 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
457 packuswb xmm2, xmm3 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
458 movq [r0+r1*1], xmm2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
459 movhps [r0+r1*2], xmm2 |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
460 lea r0, [r0+r1*2] |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
461 dec r2d |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
462 jg .loop |
afee30fe8c26
16x16 and 8x8c x86 SIMD intra pred functions for VP8 and H.264
darkshikari
parents:
diff
changeset
|
463 REP_RET |
12001
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
464 |
12002
9c8584f68754
Add missing comment header for predict_4x4_dc_mmxext
darkshikari
parents:
12001
diff
changeset
|
465 ;----------------------------------------------------------------------------- |
9c8584f68754
Add missing comment header for predict_4x4_dc_mmxext
darkshikari
parents:
12001
diff
changeset
|
466 ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride) |
9c8584f68754
Add missing comment header for predict_4x4_dc_mmxext
darkshikari
parents:
12001
diff
changeset
|
467 ;----------------------------------------------------------------------------- |
9c8584f68754
Add missing comment header for predict_4x4_dc_mmxext
darkshikari
parents:
12001
diff
changeset
|
468 |
12001
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
469 cglobal pred4x4_dc_mmxext, 3,5 |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
470 pxor mm7, mm7 |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
471 mov r4, r0 |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
472 sub r0, r2 |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
473 movd mm0, [r0] |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
474 psadbw mm0, mm7 |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
475 movzx r1d, byte [r0+r2*1-1] |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
476 movd r3d, mm0 |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
477 add r3d, r1d |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
478 movzx r1d, byte [r0+r2*2-1] |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
479 lea r0, [r0+r2*2] |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
480 add r3d, r1d |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
481 movzx r1d, byte [r0+r2*1-1] |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
482 add r3d, r1d |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
483 movzx r1d, byte [r0+r2*2-1] |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
484 add r3d, r1d |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
485 add r3d, 4 |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
486 shr r3d, 3 |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
487 imul r3d, 0x01010101 |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
488 mov [r4+r2*0], r3d |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
489 mov [r0+r2*0], r3d |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
490 mov [r0+r2*1], r3d |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
491 mov [r0+r2*2], r3d |
953a0949c789
Fix some intra pred MMX functions that used MMXEXT instructions
darkshikari
parents:
11951
diff
changeset
|
492 RET |
12003
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
493 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
494 ;----------------------------------------------------------------------------- |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
495 ; void pred4x4_tm_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride) |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
496 ;----------------------------------------------------------------------------- |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
497 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
498 %macro PRED4x4_TM_MMX 1 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
499 cglobal pred4x4_tm_vp8_%1, 3,6 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
500 sub r0, r2 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
501 pxor mm7, mm7 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
502 movd mm0, [r0] |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
503 punpcklbw mm0, mm7 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
504 movzx r4d, byte [r0-1] |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
505 mov r5d, 2 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
506 .loop: |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
507 movzx r1d, byte [r0+r2*1-1] |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
508 movzx r3d, byte [r0+r2*2-1] |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
509 sub r1d, r4d |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
510 sub r3d, r4d |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
511 movd mm2, r1d |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
512 movd mm4, r3d |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
513 %ifidn %1, mmx |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
514 punpcklwd mm2, mm2 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
515 punpcklwd mm4, mm4 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
516 punpckldq mm2, mm2 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
517 punpckldq mm4, mm4 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
518 %else |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
519 pshufw mm2, mm2, 0 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
520 pshufw mm4, mm4, 0 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
521 %endif |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
522 paddw mm2, mm0 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
523 paddw mm4, mm0 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
524 packuswb mm2, mm2 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
525 packuswb mm4, mm4 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
526 movd [r0+r2*1], mm2 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
527 movd [r0+r2*2], mm4 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
528 lea r0, [r0+r2*2] |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
529 dec r5d |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
530 jg .loop |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
531 REP_RET |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
532 %endmacro |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
533 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
534 PRED4x4_TM_MMX mmx |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
535 PRED4x4_TM_MMX mmxext |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
536 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
537 cglobal pred4x4_tm_vp8_ssse3, 3,3 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
538 sub r0, r2 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
539 movq mm6, [tm_shuf] |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
540 pxor mm1, mm1 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
541 movd mm0, [r0] |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
542 punpcklbw mm0, mm1 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
543 movd mm7, [r0-4] |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
544 pshufb mm7, mm6 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
545 lea r1, [r0+r2*2] |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
546 movd mm2, [r0+r2*1-4] |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
547 movd mm3, [r0+r2*2-4] |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
548 movd mm4, [r1+r2*1-4] |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
549 movd mm5, [r1+r2*2-4] |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
550 pshufb mm2, mm6 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
551 pshufb mm3, mm6 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
552 pshufb mm4, mm6 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
553 pshufb mm5, mm6 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
554 psubw mm2, mm7 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
555 psubw mm3, mm7 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
556 psubw mm4, mm7 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
557 psubw mm5, mm7 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
558 paddw mm2, mm0 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
559 paddw mm3, mm0 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
560 paddw mm4, mm0 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
561 paddw mm5, mm0 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
562 packuswb mm2, mm2 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
563 packuswb mm3, mm3 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
564 packuswb mm4, mm4 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
565 packuswb mm5, mm5 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
566 movd [r0+r2*1], mm2 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
567 movd [r0+r2*2], mm3 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
568 movd [r1+r2*1], mm4 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
569 movd [r1+r2*2], mm5 |
3b761226ea35
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
darkshikari
parents:
12002
diff
changeset
|
570 RET |
12004 | 571 |
572 ; dest, left, right, src, tmp | |
573 ; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2 | |
574 %macro PRED4x4_LOWPASS 5 | |
575 mova %5, %2 | |
576 pavgb %2, %3 | |
577 pxor %3, %5 | |
578 mova %1, %4 | |
579 pand %3, [pb_1] | |
580 psubusb %2, %3 | |
581 pavgb %1, %2 | |
582 %endmacro | |
583 | |
584 ;----------------------------------------------------------------------------- | |
585 ; void pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride) | |
586 ;----------------------------------------------------------------------------- | |
587 | |
588 INIT_MMX | |
589 cglobal pred4x4_vertical_vp8_mmxext, 3,3 | |
590 sub r0, r2 | |
591 movd m1, [r0-1] | |
592 movd m0, [r0] | |
593 mova m2, m0 ;t0 t1 t2 t3 | |
594 punpckldq m0, [r1] ;t0 t1 t2 t3 t4 t5 t6 t7 | |
595 lea r1, [r0+r2*2] | |
596 psrlq m0, 8 ;t1 t2 t3 t4 | |
597 PRED4x4_LOWPASS m3, m1, m0, m2, m4 | |
598 movd [r0+r2*1], m3 | |
599 movd [r0+r2*2], m3 | |
600 movd [r1+r2*1], m3 | |
601 movd [r1+r2*2], m3 | |
602 RET |