Mercurial > libavcodec.hg
annotate armv4l/dsputil_iwmmxt_rnd.h @ 5788:a3a2968e16ad libavcodec
export DCT_common_init and add ff_prefix
author | bcoudurier |
---|---|
date | Mon, 08 Oct 2007 09:09:19 +0000 |
parents | c8c591fe26f8 |
children | 1876bc447aa4 |
rev | line source |
---|---|
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2734
diff
changeset
|
1 /* |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2734
diff
changeset
|
2 * iWMMXt optimized DSP utils |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2734
diff
changeset
|
3 * copyright (c) 2004 AGAWA Koji |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2734
diff
changeset
|
4 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2734
diff
changeset
|
8 * modify it under the terms of the GNU Lesser General Public |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2734
diff
changeset
|
9 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2734
diff
changeset
|
11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2734
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2734
diff
changeset
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2734
diff
changeset
|
15 * Lesser General Public License for more details. |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2734
diff
changeset
|
16 * |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2734
diff
changeset
|
17 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2734
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2734
diff
changeset
|
20 */ |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2734
diff
changeset
|
21 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
22 void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
23 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
24 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
25 __asm__ __volatile__ ( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
26 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
27 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
28 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
29 "add r4, %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
30 "add r5, %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
31 "mov %[line_size], %[line_size], lsl #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
32 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
33 "wldrd wr0, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
34 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
35 "wldrd wr1, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
36 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
37 "wldrd wr3, [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
38 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
39 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
40 "wldrd wr4, [r4, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
41 "add r4, r4, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
42 "walignr1 wr8, wr0, wr1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
43 "pld [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
44 "pld [r4, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
45 "walignr1 wr10, wr3, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
46 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
47 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
48 "wstrd wr10, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
49 "add r5, r5, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
50 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
51 : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
52 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
53 : "memory", "r4", "r5", "r12"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
54 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
55 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
56 void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
57 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
58 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
59 __asm__ __volatile__ ( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
60 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
61 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
62 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
63 "add r4, %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
64 "add r5, %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
65 "mov %[line_size], %[line_size], lsl #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
66 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
67 "wldrd wr0, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
68 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
69 "wldrd wr1, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
70 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
71 "wldrd wr3, [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
72 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
73 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
74 "wldrd wr4, [r4, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
75 "add r4, r4, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
76 "walignr1 wr8, wr0, wr1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
77 "wldrd wr0, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
78 "wldrd wr2, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
79 "pld [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
80 "pld [r4, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
81 "walignr1 wr10, wr3, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
82 WAVG2B" wr8, wr8, wr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
83 WAVG2B" wr10, wr10, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
84 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
85 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
86 "wstrd wr10, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
87 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
88 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
89 "add r5, r5, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
90 "pld [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
91 "pld [r5, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
92 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
93 : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
94 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
95 : "memory", "r4", "r5", "r12"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
96 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
97 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
98 void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
99 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
100 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
101 __asm__ __volatile__ ( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
102 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
103 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
104 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
105 "add r4, %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
106 "add r5, %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
107 "mov %[line_size], %[line_size], lsl #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
108 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
109 "wldrd wr0, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
110 "wldrd wr1, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
111 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
112 "wldrd wr2, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
113 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
114 "wldrd wr3, [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
115 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
116 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
117 "walignr1 wr8, wr0, wr1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
118 "wldrd wr4, [r4, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
119 "walignr1 wr9, wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
120 "wldrd wr5, [r4, #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
121 "add r4, r4, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
122 "pld [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
123 "pld [r4, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
124 "walignr1 wr10, wr3, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
125 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
126 "walignr1 wr11, wr4, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
127 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
128 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
129 "wstrd wr10, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
130 "wstrd wr11, [r5, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
131 "add r5, r5, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
132 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
133 : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
134 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
135 : "memory", "r4", "r5", "r12"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
136 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
137 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
138 void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
139 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
140 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
141 __asm__ __volatile__ ( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
142 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
143 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
144 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
145 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
146 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
147 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
148 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
149 "add r4, %[pixels], %[line_size]\n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
150 "add r5, %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
151 "mov %[line_size], %[line_size], lsl #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
152 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
153 "wldrd wr0, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
154 "wldrd wr1, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
155 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
156 "wldrd wr2, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
157 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
158 "wldrd wr3, [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
159 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
160 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
161 "walignr1 wr8, wr0, wr1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
162 "wldrd wr4, [r4, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
163 "walignr1 wr9, wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
164 "wldrd wr5, [r4, #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
165 "add r4, r4, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
166 "wldrd wr0, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
167 "pld [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
168 "wldrd wr1, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
169 "pld [r4, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
170 "wldrd wr2, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
171 "walignr1 wr10, wr3, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
172 "wldrd wr3, [r5, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
173 WAVG2B" wr8, wr8, wr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
174 WAVG2B" wr9, wr9, wr1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
175 WAVG2B" wr10, wr10, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
176 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
177 "walignr1 wr11, wr4, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
178 WAVG2B" wr11, wr11, wr3 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
179 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
180 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
181 "wstrd wr10, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
182 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
183 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
184 "wstrd wr11, [r5, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
185 "add r5, r5, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
186 "pld [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
187 "pld [r5, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
188 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
189 : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
190 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
191 : "memory", "r4", "r5", "r12"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
192 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
193 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
194 void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
195 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
196 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
197 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
198 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
199 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
200 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
201 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
202 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
203 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
204 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
205 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
206 "add r12, r12, #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
207 "add r4, %[pixels], %[line_size]\n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
208 "tmcr wcgr2, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
209 "add r5, %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
210 "mov %[line_size], %[line_size], lsl #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
211 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
212 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
213 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
214 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
215 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
216 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
217 "wldrd wr13, [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
218 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
219 "wldrd wr14, [r4, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
220 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
221 "add r4, r4, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
222 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
223 "pld [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
224 "pld [r4, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
225 "walignr1 wr2, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
226 "wmoveq wr4, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
227 "wmoveq wr6, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
228 "walignr2ne wr4, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
229 "walignr2ne wr6, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
230 WAVG2B" wr0, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
231 WAVG2B" wr2, wr2, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
232 "wstrd wr0, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
233 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
234 "wstrd wr2, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
235 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
236 "add r5, r5, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
237 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
238 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
239 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
240 : "r4", "r5", "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
241 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
242 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
243 void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
244 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
245 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
246 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
247 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
248 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
249 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
250 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
251 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
252 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
253 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
254 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
255 "add r12, r12, #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
256 "add r4, %[pixels], %[line_size]\n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
257 "tmcr wcgr2, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
258 "add r5, %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
259 "mov %[line_size], %[line_size], lsl #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
260 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
261 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
262 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
263 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
264 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
265 "wldrd wr12, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
266 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
267 "wldrd wr13, [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
268 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
269 "wldrd wr14, [r4, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
270 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
271 "wldrd wr15, [r4, #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
272 "add r4, r4, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
273 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
274 "pld [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
275 "pld [r4, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
276 "walignr1 wr1, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
277 "walignr1 wr2, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
278 "walignr1 wr3, wr14, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
279 "wmoveq wr4, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
280 "wmoveq wr5, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
281 "wmoveq wr6, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
282 "wmoveq wr7, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
283 "walignr2ne wr4, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
284 "walignr2ne wr5, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
285 "walignr2ne wr6, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
286 "walignr2ne wr7, wr14, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
287 WAVG2B" wr0, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
288 WAVG2B" wr1, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
289 "wstrd wr0, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
290 WAVG2B" wr2, wr2, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
291 "wstrd wr1, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
292 WAVG2B" wr3, wr3, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
293 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
294 "wstrd wr2, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
295 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
296 "wstrd wr3, [r5, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
297 "add r5, r5, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
298 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
299 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
300 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
301 : "r4", "r5", "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
302 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
303 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
304 void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
305 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
306 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
307 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
308 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
309 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
310 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
311 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
312 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
313 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
314 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
315 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
316 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
317 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
318 "add r12, r12, #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
319 "add r4, %[pixels], %[line_size]\n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
320 "tmcr wcgr2, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
321 "add r5, %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
322 "mov %[line_size], %[line_size], lsl #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
323 "pld [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
324 "pld [r5, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
325 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
326 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
327 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
328 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
329 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
330 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
331 "wldrd wr13, [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
332 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
333 "wldrd wr14, [r4, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
334 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
335 "add r4, r4, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
336 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
337 "pld [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
338 "pld [r4, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
339 "walignr1 wr2, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
340 "wmoveq wr4, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
341 "wmoveq wr6, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
342 "walignr2ne wr4, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
343 "wldrd wr10, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
344 "walignr2ne wr6, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
345 "wldrd wr12, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
346 WAVG2B" wr0, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
347 WAVG2B" wr2, wr2, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
348 WAVG2B" wr0, wr0, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
349 WAVG2B" wr2, wr2, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
350 "wstrd wr0, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
351 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
352 "wstrd wr2, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
353 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
354 "add r5, r5, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
355 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
356 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
357 "pld [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
358 "pld [r5, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
359 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
360 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
361 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
362 : "r4", "r5", "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
363 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
364 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
365 void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
366 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
367 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
368 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
369 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
370 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
371 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
372 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
373 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
374 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
375 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
376 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
377 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
378 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
379 "add r12, r12, #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
380 "add r4, %[pixels], %[line_size]\n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
381 "tmcr wcgr2, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
382 "add r5, %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
383 "mov %[line_size], %[line_size], lsl #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
384 "pld [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
385 "pld [r5, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
386 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
387 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
388 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
389 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
390 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
391 "wldrd wr12, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
392 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
393 "wldrd wr13, [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
394 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
395 "wldrd wr14, [r4, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
396 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
397 "wldrd wr15, [r4, #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
398 "add r4, r4, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
399 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
400 "pld [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
401 "pld [r4, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
402 "walignr1 wr1, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
403 "walignr1 wr2, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
404 "walignr1 wr3, wr14, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
405 "wmoveq wr4, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
406 "wmoveq wr5, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
407 "wmoveq wr6, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
408 "wmoveq wr7, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
409 "walignr2ne wr4, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
410 "walignr2ne wr5, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
411 "walignr2ne wr6, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
412 "walignr2ne wr7, wr14, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
413 "wldrd wr10, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
414 WAVG2B" wr0, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
415 "wldrd wr11, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
416 WAVG2B" wr1, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
417 "wldrd wr12, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
418 WAVG2B" wr2, wr2, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
419 "wldrd wr13, [r5, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
420 WAVG2B" wr3, wr3, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
421 WAVG2B" wr0, wr0, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
422 WAVG2B" wr1, wr1, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
423 WAVG2B" wr2, wr2, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
424 WAVG2B" wr3, wr3, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
425 "wstrd wr0, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
426 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
427 "wstrd wr1, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
428 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
429 "wstrd wr2, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
430 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
431 "wstrd wr3, [r5, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
432 "add r5, r5, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
433 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
434 "pld [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
435 "pld [r5, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
436 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
437 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
438 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
439 :"r4", "r5", "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
440 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
441 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
442 void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
443 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
444 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
445 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
446 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
447 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
448 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
449 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
450 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
451 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
452 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
453 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
454 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
455 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
456 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
457 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
458 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
459 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
460 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
461 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
462 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
463 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
464 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
465 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
466 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
467 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
468 "walignr1 wr4, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
469 "wldrd wr10, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
470 WAVG2B" wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
471 WAVG2B" wr8, wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
472 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
473 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
474 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
475 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
476 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
477 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
478 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
479 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
480 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
481 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
482 "wldrd wr10, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
483 WAVG2B" wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
484 WAVG2B" wr8, wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
485 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
486 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
487 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
488 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
489 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
490 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
491 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
492 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
493 : "cc", "memory", "r12"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
494 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
495 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
496 void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
497 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
498 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
499 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
500 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
501 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
502 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
503 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
504 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
505 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
506 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
507 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
508 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
509 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
510 "wldrd wr12, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
511 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
512 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
513 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
514 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
515 "walignr1 wr1, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
516 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
517 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
518 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
519 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
520 "wldrd wr12, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
521 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
522 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
523 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
524 "walignr1 wr4, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
525 "walignr1 wr5, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
526 WAVG2B" wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
527 WAVG2B" wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
528 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
529 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
530 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
531 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
532 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
533 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
534 "wldrd wr12, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
535 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
536 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
537 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
538 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
539 "walignr1 wr1, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
540 WAVG2B" wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
541 WAVG2B" wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
542 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
543 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
544 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
545 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
546 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
547 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
548 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
549 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
550 : "r4", "r5", "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
551 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
552 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
553 void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
554 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
555 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
556 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
557 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
558 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
559 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
560 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
561 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
562 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
563 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
564 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
565 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
566 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
567 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
568 "wldrd wr12, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
569 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
570 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
571 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
572 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
573 "walignr1 wr1, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
574 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
575 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
576 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
577 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
578 "wldrd wr12, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
579 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
580 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
581 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
582 "walignr1 wr4, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
583 "walignr1 wr5, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
584 "wldrd wr10, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
585 "wldrd wr11, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
586 WAVG2B" wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
587 WAVG2B" wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
588 WAVG2B" wr8, wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
589 WAVG2B" wr9, wr9, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
590 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
591 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
592 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
593 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
594 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
595 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
596 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
597 "wldrd wr12, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
598 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
599 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
600 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
601 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
602 "walignr1 wr1, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
603 "wldrd wr10, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
604 "wldrd wr11, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
605 WAVG2B" wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
606 WAVG2B" wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
607 WAVG2B" wr8, wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
608 WAVG2B" wr9, wr9, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
609 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
610 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
611 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
612 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
613 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
614 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
615 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
616 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
617 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
618 : "r4", "r5", "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
619 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
620 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
621 void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
622 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
623 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
624 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
625 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
626 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
627 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
628 "mov r12, #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
629 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
630 "tmcr wcgr0, r12 \n\t" /* for shift value */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
631 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
632 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
633 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
634 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
635 // [wr0 wr1 wr2 wr3] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
636 // [wr4 wr5 wr6 wr7] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
637 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
638 "add r12, r12, #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
639 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
640 "tmcr wcgr2, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
641 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
642 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
643 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
644 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
645 "walignr1 wr2, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
646 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
647 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
648 "wunpckelub wr0, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
649 "wunpckehub wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
650 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
651 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
652 "waddhus wr0, wr0, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
653 "waddhus wr1, wr1, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
654 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
655 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
656 // [wr0 wr1 wr2 wr3] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
657 // [wr4 wr5 wr6 wr7] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
658 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
659 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
660 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
661 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
662 "walignr1 wr6, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
663 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
664 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
665 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
666 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
667 "wunpckelub wr4, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
668 "wunpckehub wr5, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
669 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
670 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
671 "waddhus wr4, wr4, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
672 "waddhus wr5, wr5, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
673 "waddhus wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
674 "waddhus wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
675 "waddhus wr8, wr8, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
676 "waddhus wr9, wr9, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
677 "wsrlhg wr8, wr8, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
678 "wsrlhg wr9, wr9, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
679 "wpackhus wr8, wr8, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
680 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
681 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
682 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
683 // [wr0 wr1 wr2 wr3] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
684 // [wr4 wr5 wr6 wr7] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
685 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
686 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
687 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
688 "walignr1 wr2, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
689 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
690 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
691 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
692 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
693 "wunpckelub wr0, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
694 "wunpckehub wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
695 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
696 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
697 "waddhus wr0, wr0, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
698 "waddhus wr1, wr1, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
699 "waddhus wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
700 "waddhus wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
701 "waddhus wr8, wr8, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
702 "waddhus wr9, wr9, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
703 "wsrlhg wr8, wr8, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
704 "wsrlhg wr9, wr9, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
705 "wpackhus wr8, wr8, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
706 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
707 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
708 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
709 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
710 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
711 : [line_size]"r"(line_size) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
712 : "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
713 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
714 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
715 void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
716 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
717 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
718 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
719 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
720 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
721 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
722 "mov r12, #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
723 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
724 "tmcr wcgr0, r12 \n\t" /* for shift value */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
725 /* alignment */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
726 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
727 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
728 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
729 "add r12, r12, #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
730 "tmcr wcgr2, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
731 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
732 // [wr0 wr1 wr2 wr3] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
733 // [wr4 wr5 wr6 wr7] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
734 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
735 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
736 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
737 "wldrd wr14, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
738 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
739 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
740 "walignr1 wr2, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
741 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
742 "walignr1 wr3, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
743 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
744 "wmoveq wr11, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
745 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
746 "walignr2ne wr11, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
747 "wunpckelub wr0, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
748 "wunpckehub wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
749 "wunpckelub wr2, wr3 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
750 "wunpckehub wr3, wr3 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
751 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
752 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
753 "wunpckelub wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
754 "wunpckehub wr11, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
755 "waddhus wr0, wr0, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
756 "waddhus wr1, wr1, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
757 "waddhus wr2, wr2, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
758 "waddhus wr3, wr3, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
759 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
760 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
761 // [wr0 wr1 wr2 wr3] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
762 // [wr4 wr5 wr6 wr7] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
763 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
764 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
765 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
766 "wldrd wr14, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
767 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
768 "walignr1 wr6, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
769 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
770 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
771 "walignr1 wr7, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
772 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
773 "wmoveq wr11, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
774 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
775 "walignr2ne wr11, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
776 "wunpckelub wr4, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
777 "wunpckehub wr5, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
778 "wunpckelub wr6, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
779 "wunpckehub wr7, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
780 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
781 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
782 "wunpckelub wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
783 "wunpckehub wr11, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
784 "waddhus wr4, wr4, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
785 "waddhus wr5, wr5, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
786 "waddhus wr6, wr6, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
787 "waddhus wr7, wr7, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
788 "waddhus wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
789 "waddhus wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
790 "waddhus wr10, wr2, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
791 "waddhus wr11, wr3, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
792 "waddhus wr8, wr8, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
793 "waddhus wr9, wr9, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
794 "waddhus wr10, wr10, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
795 "waddhus wr11, wr11, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
796 "wsrlhg wr8, wr8, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
797 "wsrlhg wr9, wr9, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
798 "wsrlhg wr10, wr10, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
799 "wsrlhg wr11, wr11, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
800 "wpackhus wr8, wr8, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
801 "wpackhus wr9, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
802 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
803 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
804 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
805 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
806 // [wr0 wr1 wr2 wr3] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
807 // [wr4 wr5 wr6 wr7] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
808 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
809 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
810 "wldrd wr14, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
811 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
812 "walignr1 wr2, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
813 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
814 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
815 "walignr1 wr3, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
816 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
817 "wmoveq wr11, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
818 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
819 "walignr2ne wr11, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
820 "wunpckelub wr0, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
821 "wunpckehub wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
822 "wunpckelub wr2, wr3 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
823 "wunpckehub wr3, wr3 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
824 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
825 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
826 "wunpckelub wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
827 "wunpckehub wr11, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
828 "waddhus wr0, wr0, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
829 "waddhus wr1, wr1, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
830 "waddhus wr2, wr2, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
831 "waddhus wr3, wr3, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
832 "waddhus wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
833 "waddhus wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
834 "waddhus wr10, wr2, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
835 "waddhus wr11, wr3, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
836 "waddhus wr8, wr8, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
837 "waddhus wr9, wr9, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
838 "waddhus wr10, wr10, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
839 "waddhus wr11, wr11, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
840 "wsrlhg wr8, wr8, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
841 "wsrlhg wr9, wr9, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
842 "wsrlhg wr10, wr10, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
843 "wsrlhg wr11, wr11, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
844 "wpackhus wr8, wr8, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
845 "wpackhus wr9, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
846 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
847 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
848 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
849 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
850 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
851 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
852 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
853 : [line_size]"r"(line_size) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
854 : "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
855 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
856 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
857 void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
858 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
859 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
860 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
861 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
862 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
863 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
864 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
865 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
866 "mov r12, #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
867 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
868 "tmcr wcgr0, r12 \n\t" /* for shift value */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
869 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
870 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
871 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
872 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
873 // [wr0 wr1 wr2 wr3] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
874 // [wr4 wr5 wr6 wr7] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
875 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
876 "add r12, r12, #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
877 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
878 "tmcr wcgr2, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
879 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
880 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
881 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
882 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
883 "walignr1 wr2, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
884 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
885 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
886 "wunpckelub wr0, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
887 "wunpckehub wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
888 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
889 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
890 "waddhus wr0, wr0, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
891 "waddhus wr1, wr1, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
892 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
893 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
894 // [wr0 wr1 wr2 wr3] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
895 // [wr4 wr5 wr6 wr7] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
896 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
897 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
898 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
899 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
900 "walignr1 wr6, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
901 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
902 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
903 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
904 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
905 "wunpckelub wr4, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
906 "wunpckehub wr5, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
907 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
908 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
909 "waddhus wr4, wr4, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
910 "waddhus wr5, wr5, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
911 "waddhus wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
912 "waddhus wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
913 "waddhus wr8, wr8, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
914 "waddhus wr9, wr9, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
915 "wldrd wr12, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
916 "wsrlhg wr8, wr8, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
917 "wsrlhg wr9, wr9, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
918 "wpackhus wr8, wr8, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
919 WAVG2B" wr8, wr8, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
920 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
921 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
922 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
923 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
924 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
925 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
926 // [wr0 wr1 wr2 wr3] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
927 // [wr4 wr5 wr6 wr7] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
928 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
929 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
930 "walignr1 wr2, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
931 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
932 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
933 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
934 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
935 "wunpckelub wr0, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
936 "wunpckehub wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
937 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
938 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
939 "waddhus wr0, wr0, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
940 "waddhus wr1, wr1, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
941 "waddhus wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
942 "waddhus wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
943 "waddhus wr8, wr8, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
944 "waddhus wr9, wr9, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
945 "wldrd wr12, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
946 "wsrlhg wr8, wr8, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
947 "wsrlhg wr9, wr9, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
948 "wpackhus wr8, wr8, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
949 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
950 WAVG2B" wr8, wr8, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
951 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
952 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
953 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
954 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
955 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
956 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
957 : [line_size]"r"(line_size) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
958 : "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
959 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
960 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
961 void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
962 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
963 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
964 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
965 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
966 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
967 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
968 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
969 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
970 "mov r12, #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
971 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
972 "tmcr wcgr0, r12 \n\t" /* for shift value */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
973 /* alignment */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
974 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
975 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
976 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
977 "add r12, r12, #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
978 "tmcr wcgr2, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
979 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
980 // [wr0 wr1 wr2 wr3] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
981 // [wr4 wr5 wr6 wr7] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
982 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
983 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
984 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
985 "wldrd wr14, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
986 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
987 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
988 "walignr1 wr2, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
989 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
990 "walignr1 wr3, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
991 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
992 "wmoveq wr11, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
993 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
994 "walignr2ne wr11, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
995 "wunpckelub wr0, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
996 "wunpckehub wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
997 "wunpckelub wr2, wr3 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
998 "wunpckehub wr3, wr3 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
999 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1000 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1001 "wunpckelub wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1002 "wunpckehub wr11, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1003 "waddhus wr0, wr0, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1004 "waddhus wr1, wr1, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1005 "waddhus wr2, wr2, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1006 "waddhus wr3, wr3, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1007 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1008 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1009 // [wr0 wr1 wr2 wr3] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1010 // [wr4 wr5 wr6 wr7] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1011 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1012 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1013 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1014 "wldrd wr14, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1015 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1016 "walignr1 wr6, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1017 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1018 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1019 "walignr1 wr7, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1020 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1021 "wmoveq wr11, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1022 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1023 "walignr2ne wr11, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1024 "wunpckelub wr4, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1025 "wunpckehub wr5, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1026 "wunpckelub wr6, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1027 "wunpckehub wr7, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1028 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1029 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1030 "wunpckelub wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1031 "wunpckehub wr11, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1032 "waddhus wr4, wr4, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1033 "waddhus wr5, wr5, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1034 "waddhus wr6, wr6, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1035 "waddhus wr7, wr7, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1036 "waddhus wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1037 "waddhus wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1038 "waddhus wr10, wr2, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1039 "waddhus wr11, wr3, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1040 "waddhus wr8, wr8, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1041 "waddhus wr9, wr9, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1042 "waddhus wr10, wr10, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1043 "waddhus wr11, wr11, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1044 "wsrlhg wr8, wr8, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1045 "wsrlhg wr9, wr9, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1046 "wldrd wr12, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1047 "wldrd wr13, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1048 "wsrlhg wr10, wr10, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1049 "wsrlhg wr11, wr11, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1050 "wpackhus wr8, wr8, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1051 "wpackhus wr9, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1052 WAVG2B" wr8, wr8, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1053 WAVG2B" wr9, wr9, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1054 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1055 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1056 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1057 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1058 // [wr0 wr1 wr2 wr3] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1059 // [wr4 wr5 wr6 wr7] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1060 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1061 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1062 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1063 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1064 "wldrd wr14, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1065 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1066 "walignr1 wr2, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1067 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1068 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1069 "walignr1 wr3, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1070 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1071 "wmoveq wr11, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1072 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1073 "walignr2ne wr11, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1074 "wunpckelub wr0, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1075 "wunpckehub wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1076 "wunpckelub wr2, wr3 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1077 "wunpckehub wr3, wr3 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1078 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1079 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1080 "wunpckelub wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1081 "wunpckehub wr11, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1082 "waddhus wr0, wr0, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1083 "waddhus wr1, wr1, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1084 "waddhus wr2, wr2, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1085 "waddhus wr3, wr3, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1086 "waddhus wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1087 "waddhus wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1088 "waddhus wr10, wr2, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1089 "waddhus wr11, wr3, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1090 "waddhus wr8, wr8, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1091 "waddhus wr9, wr9, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1092 "waddhus wr10, wr10, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1093 "waddhus wr11, wr11, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1094 "wsrlhg wr8, wr8, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1095 "wsrlhg wr9, wr9, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1096 "wldrd wr12, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1097 "wldrd wr13, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1098 "wsrlhg wr10, wr10, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1099 "wsrlhg wr11, wr11, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1100 "wpackhus wr8, wr8, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1101 "wpackhus wr9, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1102 WAVG2B" wr8, wr8, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1103 WAVG2B" wr9, wr9, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1104 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1105 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1106 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1107 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1108 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1109 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1110 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1111 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1112 : [line_size]"r"(line_size) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1113 : "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1114 } |