annotate arm/dsputil_iwmmxt_rnd_template.c @ 12483:0159a19bfff7 libavcodec

aacdec: Rework channel mapping compatibility hacks. For a PCE based configuration map the channels solely based on tags. For an indexed configuration map the channels solely based on position. This works with all known exotic samples including al17, elem_id0, bad_concat, and lfe_is_sce.
author alexc
date Fri, 10 Sep 2010 18:01:48 +0000
parents 9281a8a9387a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3699
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
1 /*
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
2 * iWMMXt optimized DSP utils
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
3 * copyright (c) 2004 AGAWA Koji
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
4 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3699
diff changeset
5 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3699
diff changeset
6 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3699
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
3699
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
9 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3699
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
3699
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
11 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3699
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
3699
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
15 * Lesser General Public License for more details.
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
16 *
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3699
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
3699
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
20 */
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
21
2734
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
22 void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
23 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
24 int stride = line_size;
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6636
diff changeset
25 __asm__ volatile (
2734
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
26 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
27 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
28 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
29 "add r4, %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
30 "add r5, %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
31 "mov %[line_size], %[line_size], lsl #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
32 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
33 "wldrd wr0, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
34 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
35 "wldrd wr1, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
36 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
37 "wldrd wr3, [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
38 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
39 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
40 "wldrd wr4, [r4, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
41 "add r4, r4, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
42 "walignr1 wr8, wr0, wr1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
43 "pld [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
44 "pld [r4, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
45 "walignr1 wr10, wr3, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
46 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
47 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
48 "wstrd wr10, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
49 "add r5, r5, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
50 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
51 : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
52 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
53 : "memory", "r4", "r5", "r12");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
54 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
55
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
56 void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
57 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
58 int stride = line_size;
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6636
diff changeset
59 __asm__ volatile (
2734
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
60 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
61 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
62 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
63 "add r4, %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
64 "add r5, %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
65 "mov %[line_size], %[line_size], lsl #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
66 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
67 "wldrd wr0, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
68 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
69 "wldrd wr1, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
70 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
71 "wldrd wr3, [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
72 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
73 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
74 "wldrd wr4, [r4, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
75 "add r4, r4, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
76 "walignr1 wr8, wr0, wr1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
77 "wldrd wr0, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
78 "wldrd wr2, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
79 "pld [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
80 "pld [r4, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
81 "walignr1 wr10, wr3, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
82 WAVG2B" wr8, wr8, wr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
83 WAVG2B" wr10, wr10, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
84 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
85 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
86 "wstrd wr10, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
87 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
88 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
89 "add r5, r5, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
90 "pld [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
91 "pld [r5, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
92 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
93 : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
94 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
95 : "memory", "r4", "r5", "r12");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
96 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
97
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
98 void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
99 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
100 int stride = line_size;
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6636
diff changeset
101 __asm__ volatile (
2734
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
102 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
103 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
104 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
105 "add r4, %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
106 "add r5, %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
107 "mov %[line_size], %[line_size], lsl #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
108 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
109 "wldrd wr0, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
110 "wldrd wr1, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
111 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
112 "wldrd wr2, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
113 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
114 "wldrd wr3, [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
115 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
116 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
117 "walignr1 wr8, wr0, wr1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
118 "wldrd wr4, [r4, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
119 "walignr1 wr9, wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
120 "wldrd wr5, [r4, #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
121 "add r4, r4, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
122 "pld [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
123 "pld [r4, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
124 "walignr1 wr10, wr3, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
125 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
126 "walignr1 wr11, wr4, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
127 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
128 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
129 "wstrd wr10, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
130 "wstrd wr11, [r5, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
131 "add r5, r5, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
132 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
133 : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
134 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
135 : "memory", "r4", "r5", "r12");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
136 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
137
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
138 void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
139 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
140 int stride = line_size;
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6636
diff changeset
141 __asm__ volatile (
2734
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
142 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
143 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
144 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
145 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
146 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
147 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
148 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
149 "add r4, %[pixels], %[line_size]\n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
150 "add r5, %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
151 "mov %[line_size], %[line_size], lsl #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
152 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
153 "wldrd wr0, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
154 "wldrd wr1, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
155 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
156 "wldrd wr2, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
157 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
158 "wldrd wr3, [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
159 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
160 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
161 "walignr1 wr8, wr0, wr1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
162 "wldrd wr4, [r4, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
163 "walignr1 wr9, wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
164 "wldrd wr5, [r4, #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
165 "add r4, r4, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
166 "wldrd wr0, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
167 "pld [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
168 "wldrd wr1, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
169 "pld [r4, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
170 "wldrd wr2, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
171 "walignr1 wr10, wr3, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
172 "wldrd wr3, [r5, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
173 WAVG2B" wr8, wr8, wr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
174 WAVG2B" wr9, wr9, wr1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
175 WAVG2B" wr10, wr10, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
176 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
177 "walignr1 wr11, wr4, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
178 WAVG2B" wr11, wr11, wr3 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
179 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
180 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
181 "wstrd wr10, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
182 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
183 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
184 "wstrd wr11, [r5, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
185 "add r5, r5, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
186 "pld [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
187 "pld [r5, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
188 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
189 : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
190 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
191 : "memory", "r4", "r5", "r12");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
192 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
193
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
194 void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
195 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
196 int stride = line_size;
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
197 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
198 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
199 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6636
diff changeset
200 __asm__ volatile(
2734
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
201 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
202 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
203 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
204 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
205 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
206 "add r12, r12, #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
207 "add r4, %[pixels], %[line_size]\n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
208 "tmcr wcgr2, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
209 "add r5, %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
210 "mov %[line_size], %[line_size], lsl #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
211
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
212 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
213 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
214 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
215 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
216 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
217 "wldrd wr13, [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
218 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
219 "wldrd wr14, [r4, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
220 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
221 "add r4, r4, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
222 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
223 "pld [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
224 "pld [r4, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
225 "walignr1 wr2, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
226 "wmoveq wr4, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
227 "wmoveq wr6, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
228 "walignr2ne wr4, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
229 "walignr2ne wr6, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
230 WAVG2B" wr0, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
231 WAVG2B" wr2, wr2, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
232 "wstrd wr0, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
233 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
234 "wstrd wr2, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
235 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
236 "add r5, r5, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
237 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
238 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
239 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
240 : "r4", "r5", "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
241 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
242
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
243 void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
244 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
245 int stride = line_size;
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
246 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
247 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
248 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6636
diff changeset
249 __asm__ volatile(
2734
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
250 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
251 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
252 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
253 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
254 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
255 "add r12, r12, #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
256 "add r4, %[pixels], %[line_size]\n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
257 "tmcr wcgr2, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
258 "add r5, %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
259 "mov %[line_size], %[line_size], lsl #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
260
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
261 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
262 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
263 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
264 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
265 "wldrd wr12, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
266 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
267 "wldrd wr13, [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
268 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
269 "wldrd wr14, [r4, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
270 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
271 "wldrd wr15, [r4, #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
272 "add r4, r4, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
273 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
274 "pld [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
275 "pld [r4, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
276 "walignr1 wr1, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
277 "walignr1 wr2, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
278 "walignr1 wr3, wr14, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
279 "wmoveq wr4, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
280 "wmoveq wr5, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
281 "wmoveq wr6, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
282 "wmoveq wr7, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
283 "walignr2ne wr4, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
284 "walignr2ne wr5, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
285 "walignr2ne wr6, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
286 "walignr2ne wr7, wr14, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
287 WAVG2B" wr0, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
288 WAVG2B" wr1, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
289 "wstrd wr0, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
290 WAVG2B" wr2, wr2, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
291 "wstrd wr1, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
292 WAVG2B" wr3, wr3, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
293 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
294 "wstrd wr2, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
295 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
296 "wstrd wr3, [r5, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
297 "add r5, r5, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
298 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
299 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
300 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
301 : "r4", "r5", "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
302 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
303
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
304 void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
305 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
306 int stride = line_size;
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
307 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
308 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
309 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6636
diff changeset
310 __asm__ volatile(
2734
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
311 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
312 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
313 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
314 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
315 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
316 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
317 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
318 "add r12, r12, #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
319 "add r4, %[pixels], %[line_size]\n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
320 "tmcr wcgr2, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
321 "add r5, %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
322 "mov %[line_size], %[line_size], lsl #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
323 "pld [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
324 "pld [r5, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
325
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
326 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
327 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
328 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
329 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
330 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
331 "wldrd wr13, [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
332 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
333 "wldrd wr14, [r4, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
334 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
335 "add r4, r4, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
336 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
337 "pld [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
338 "pld [r4, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
339 "walignr1 wr2, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
340 "wmoveq wr4, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
341 "wmoveq wr6, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
342 "walignr2ne wr4, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
343 "wldrd wr10, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
344 "walignr2ne wr6, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
345 "wldrd wr12, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
346 WAVG2B" wr0, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
347 WAVG2B" wr2, wr2, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
348 WAVG2B" wr0, wr0, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
349 WAVG2B" wr2, wr2, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
350 "wstrd wr0, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
351 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
352 "wstrd wr2, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
353 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
354 "add r5, r5, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
355 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
356 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
357 "pld [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
358 "pld [r5, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
359 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
360 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
361 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
362 : "r4", "r5", "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
363 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
364
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
365 void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
366 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
367 int stride = line_size;
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
368 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
369 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
370 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6636
diff changeset
371 __asm__ volatile(
2734
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
372 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
373 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
374 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
375 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
376 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
377 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
378 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
379 "add r12, r12, #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
380 "add r4, %[pixels], %[line_size]\n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
381 "tmcr wcgr2, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
382 "add r5, %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
383 "mov %[line_size], %[line_size], lsl #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
384 "pld [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
385 "pld [r5, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
386
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
387 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
388 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
389 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
390 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
391 "wldrd wr12, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
392 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
393 "wldrd wr13, [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
394 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
395 "wldrd wr14, [r4, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
396 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
397 "wldrd wr15, [r4, #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
398 "add r4, r4, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
399 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
400 "pld [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
401 "pld [r4, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
402 "walignr1 wr1, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
403 "walignr1 wr2, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
404 "walignr1 wr3, wr14, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
405 "wmoveq wr4, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
406 "wmoveq wr5, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
407 "wmoveq wr6, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
408 "wmoveq wr7, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
409 "walignr2ne wr4, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
410 "walignr2ne wr5, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
411 "walignr2ne wr6, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
412 "walignr2ne wr7, wr14, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
413 "wldrd wr10, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
414 WAVG2B" wr0, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
415 "wldrd wr11, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
416 WAVG2B" wr1, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
417 "wldrd wr12, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
418 WAVG2B" wr2, wr2, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
419 "wldrd wr13, [r5, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
420 WAVG2B" wr3, wr3, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
421 WAVG2B" wr0, wr0, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
422 WAVG2B" wr1, wr1, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
423 WAVG2B" wr2, wr2, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
424 WAVG2B" wr3, wr3, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
425 "wstrd wr0, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
426 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
427 "wstrd wr1, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
428 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
429 "wstrd wr2, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
430 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
431 "wstrd wr3, [r5, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
432 "add r5, r5, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
433 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
434 "pld [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
435 "pld [r5, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
436 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
437 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
438 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
439 :"r4", "r5", "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
440 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
441
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
442 void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
443 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
444 int stride = line_size;
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
445 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
446 // [wr4 wr5 wr6 wr7] for current line
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6636
diff changeset
447 __asm__ volatile(
2734
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
448 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
449 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
450 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
451 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
452 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
453
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
454 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
455 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
456 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
457 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
458 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
459 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
460 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
461
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
462 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
463 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
464 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
465 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
466 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
467 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
468 "walignr1 wr4, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
469 "wldrd wr10, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
470 WAVG2B" wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
471 WAVG2B" wr8, wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
472 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
473 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
474
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
475 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
476 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
477 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
478 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
479 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
480 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
481 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
482 "wldrd wr10, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
483 WAVG2B" wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
484 WAVG2B" wr8, wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
485 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
486 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
487
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
488 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
489 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
490 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
491 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
492 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
493 : "cc", "memory", "r12");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
494 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
495
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
496 void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
497 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
498 int stride = line_size;
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
499 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
500 // [wr4 wr5 wr6 wr7] for current line
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6636
diff changeset
501 __asm__ volatile(
2734
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
502 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
503 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
504 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
505 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
506 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
507
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
508 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
509 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
510 "wldrd wr12, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
511 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
512 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
513 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
514 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
515 "walignr1 wr1, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
516
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
517 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
518 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
519 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
520 "wldrd wr12, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
521 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
522 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
523 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
524 "walignr1 wr4, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
525 "walignr1 wr5, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
526 WAVG2B" wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
527 WAVG2B" wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
528 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
529 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
530 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
531
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
532 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
533 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
534 "wldrd wr12, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
535 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
536 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
537 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
538 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
539 "walignr1 wr1, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
540 WAVG2B" wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
541 WAVG2B" wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
542 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
543 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
544 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
545
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
546 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
547 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
548 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
549 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
550 : "r4", "r5", "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
551 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
552
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
553 void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
554 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
555 int stride = line_size;
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
556 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
557 // [wr4 wr5 wr6 wr7] for current line
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6636
diff changeset
558 __asm__ volatile(
2734
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
559 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
560 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
561 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
562 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
563 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
564
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
565 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
566 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
567 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
568 "wldrd wr12, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
569 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
570 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
571 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
572 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
573 "walignr1 wr1, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
574
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
575 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
576 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
577 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
578 "wldrd wr12, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
579 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
580 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
581 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
582 "walignr1 wr4, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
583 "walignr1 wr5, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
584 "wldrd wr10, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
585 "wldrd wr11, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
586 WAVG2B" wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
587 WAVG2B" wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
588 WAVG2B" wr8, wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
589 WAVG2B" wr9, wr9, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
590 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
591 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
592 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
593
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
594 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
595 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
596 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
597 "wldrd wr12, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
598 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
599 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
600 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
601 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
602 "walignr1 wr1, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
603 "wldrd wr10, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
604 "wldrd wr11, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
605 WAVG2B" wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
606 WAVG2B" wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
607 WAVG2B" wr8, wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
608 WAVG2B" wr9, wr9, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
609 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
610 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
611 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
612
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
613 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
614 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
615 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
616 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
617 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
618 : "r4", "r5", "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
619 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
620
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
621 void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
622 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
623 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
624 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
625 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6636
diff changeset
626 __asm__ volatile(
2734
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
627 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
628 "mov r12, #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
629 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
630 "tmcr wcgr0, r12 \n\t" /* for shift value */
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
631 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
632 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
633 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
634
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
635 // [wr0 wr1 wr2 wr3] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
636 // [wr4 wr5 wr6 wr7]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
637 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
638 "add r12, r12, #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
639 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
640 "tmcr wcgr2, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
641 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
642 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
643 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
644 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
645 "walignr1 wr2, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
646 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
647 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
648 "wunpckelub wr0, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
649 "wunpckehub wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
650 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
651 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
652 "waddhus wr0, wr0, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
653 "waddhus wr1, wr1, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
654
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
655 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
656 // [wr0 wr1 wr2 wr3]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
657 // [wr4 wr5 wr6 wr7] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
658 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
659 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
660 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
661 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
662 "walignr1 wr6, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
663 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
664 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
665 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
666 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
667 "wunpckelub wr4, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
668 "wunpckehub wr5, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
669 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
670 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
671 "waddhus wr4, wr4, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
672 "waddhus wr5, wr5, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
673 "waddhus wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
674 "waddhus wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
675 "waddhus wr8, wr8, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
676 "waddhus wr9, wr9, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
677 "wsrlhg wr8, wr8, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
678 "wsrlhg wr9, wr9, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
679 "wpackhus wr8, wr8, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
680 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
681 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
682
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
683 // [wr0 wr1 wr2 wr3] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
684 // [wr4 wr5 wr6 wr7]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
685 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
686 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
687 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
688 "walignr1 wr2, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
689 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
690 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
691 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
692 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
693 "wunpckelub wr0, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
694 "wunpckehub wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
695 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
696 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
697 "waddhus wr0, wr0, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
698 "waddhus wr1, wr1, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
699 "waddhus wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
700 "waddhus wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
701 "waddhus wr8, wr8, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
702 "waddhus wr9, wr9, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
703 "wsrlhg wr8, wr8, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
704 "wsrlhg wr9, wr9, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
705 "wpackhus wr8, wr8, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
706 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
707 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
708 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
709 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
710 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
711 : [line_size]"r"(line_size)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
712 : "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
713 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
714
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
715 void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
716 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
717 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
718 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
719 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6636
diff changeset
720 __asm__ volatile(
2734
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
721 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
722 "mov r12, #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
723 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
724 "tmcr wcgr0, r12 \n\t" /* for shift value */
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
725 /* alignment */
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
726 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
727 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
728 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
729 "add r12, r12, #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
730 "tmcr wcgr2, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
731
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
732 // [wr0 wr1 wr2 wr3] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
733 // [wr4 wr5 wr6 wr7]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
734 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
735 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
736 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
737 "wldrd wr14, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
738 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
739 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
740 "walignr1 wr2, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
741 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
742 "walignr1 wr3, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
743 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
744 "wmoveq wr11, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
745 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
746 "walignr2ne wr11, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
747 "wunpckelub wr0, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
748 "wunpckehub wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
749 "wunpckelub wr2, wr3 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
750 "wunpckehub wr3, wr3 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
751 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
752 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
753 "wunpckelub wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
754 "wunpckehub wr11, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
755 "waddhus wr0, wr0, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
756 "waddhus wr1, wr1, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
757 "waddhus wr2, wr2, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
758 "waddhus wr3, wr3, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
759
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
760 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
761 // [wr0 wr1 wr2 wr3]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
762 // [wr4 wr5 wr6 wr7] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
763 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
764 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
765 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
766 "wldrd wr14, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
767 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
768 "walignr1 wr6, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
769 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
770 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
771 "walignr1 wr7, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
772 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
773 "wmoveq wr11, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
774 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
775 "walignr2ne wr11, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
776 "wunpckelub wr4, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
777 "wunpckehub wr5, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
778 "wunpckelub wr6, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
779 "wunpckehub wr7, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
780 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
781 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
782 "wunpckelub wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
783 "wunpckehub wr11, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
784 "waddhus wr4, wr4, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
785 "waddhus wr5, wr5, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
786 "waddhus wr6, wr6, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
787 "waddhus wr7, wr7, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
788 "waddhus wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
789 "waddhus wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
790 "waddhus wr10, wr2, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
791 "waddhus wr11, wr3, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
792 "waddhus wr8, wr8, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
793 "waddhus wr9, wr9, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
794 "waddhus wr10, wr10, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
795 "waddhus wr11, wr11, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
796 "wsrlhg wr8, wr8, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
797 "wsrlhg wr9, wr9, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
798 "wsrlhg wr10, wr10, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
799 "wsrlhg wr11, wr11, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
800 "wpackhus wr8, wr8, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
801 "wpackhus wr9, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
802 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
803 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
804 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
805
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
806 // [wr0 wr1 wr2 wr3] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
807 // [wr4 wr5 wr6 wr7]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
808 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
809 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
810 "wldrd wr14, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
811 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
812 "walignr1 wr2, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
813 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
814 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
815 "walignr1 wr3, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
816 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
817 "wmoveq wr11, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
818 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
819 "walignr2ne wr11, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
820 "wunpckelub wr0, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
821 "wunpckehub wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
822 "wunpckelub wr2, wr3 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
823 "wunpckehub wr3, wr3 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
824 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
825 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
826 "wunpckelub wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
827 "wunpckehub wr11, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
828 "waddhus wr0, wr0, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
829 "waddhus wr1, wr1, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
830 "waddhus wr2, wr2, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
831 "waddhus wr3, wr3, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
832 "waddhus wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
833 "waddhus wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
834 "waddhus wr10, wr2, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
835 "waddhus wr11, wr3, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
836 "waddhus wr8, wr8, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
837 "waddhus wr9, wr9, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
838 "waddhus wr10, wr10, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
839 "waddhus wr11, wr11, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
840 "wsrlhg wr8, wr8, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
841 "wsrlhg wr9, wr9, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
842 "wsrlhg wr10, wr10, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
843 "wsrlhg wr11, wr11, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
844 "wpackhus wr8, wr8, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
845 "wpackhus wr9, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
846 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
847 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
848 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
849
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
850 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
851 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
852 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
853 : [line_size]"r"(line_size)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
854 : "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
855 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
856
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
857 void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
858 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
859 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
860 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
861 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6636
diff changeset
862 __asm__ volatile(
2734
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
863 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
864 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
865 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
866 "mov r12, #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
867 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
868 "tmcr wcgr0, r12 \n\t" /* for shift value */
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
869 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
870 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
871 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
872
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
873 // [wr0 wr1 wr2 wr3] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
874 // [wr4 wr5 wr6 wr7]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
875 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
876 "add r12, r12, #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
877 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
878 "tmcr wcgr2, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
879 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
880 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
881 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
882 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
883 "walignr1 wr2, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
884 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
885 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
886 "wunpckelub wr0, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
887 "wunpckehub wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
888 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
889 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
890 "waddhus wr0, wr0, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
891 "waddhus wr1, wr1, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
892
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
893 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
894 // [wr0 wr1 wr2 wr3]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
895 // [wr4 wr5 wr6 wr7] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
896 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
897 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
898 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
899 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
900 "walignr1 wr6, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
901 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
902 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
903 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
904 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
905 "wunpckelub wr4, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
906 "wunpckehub wr5, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
907 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
908 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
909 "waddhus wr4, wr4, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
910 "waddhus wr5, wr5, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
911 "waddhus wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
912 "waddhus wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
913 "waddhus wr8, wr8, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
914 "waddhus wr9, wr9, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
915 "wldrd wr12, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
916 "wsrlhg wr8, wr8, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
917 "wsrlhg wr9, wr9, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
918 "wpackhus wr8, wr8, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
919 WAVG2B" wr8, wr8, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
920 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
921 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
922 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
923 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
924 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
925
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
926 // [wr0 wr1 wr2 wr3] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
927 // [wr4 wr5 wr6 wr7]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
928 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
929 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
930 "walignr1 wr2, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
931 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
932 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
933 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
934 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
935 "wunpckelub wr0, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
936 "wunpckehub wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
937 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
938 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
939 "waddhus wr0, wr0, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
940 "waddhus wr1, wr1, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
941 "waddhus wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
942 "waddhus wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
943 "waddhus wr8, wr8, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
944 "waddhus wr9, wr9, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
945 "wldrd wr12, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
946 "wsrlhg wr8, wr8, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
947 "wsrlhg wr9, wr9, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
948 "wpackhus wr8, wr8, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
949 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
950 WAVG2B" wr8, wr8, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
951 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
952 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
953 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
954 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
955 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
956 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
957 : [line_size]"r"(line_size)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
958 : "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
959 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
960
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
961 void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
962 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
963 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
964 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
965 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 6636
diff changeset
966 __asm__ volatile(
2734
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
967 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
968 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
969 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
970 "mov r12, #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
971 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
972 "tmcr wcgr0, r12 \n\t" /* for shift value */
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
973 /* alignment */
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
974 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
975 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
976 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
977 "add r12, r12, #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
978 "tmcr wcgr2, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
979
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
980 // [wr0 wr1 wr2 wr3] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
981 // [wr4 wr5 wr6 wr7]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
982 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
983 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
984 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
985 "wldrd wr14, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
986 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
987 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
988 "walignr1 wr2, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
989 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
990 "walignr1 wr3, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
991 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
992 "wmoveq wr11, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
993 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
994 "walignr2ne wr11, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
995 "wunpckelub wr0, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
996 "wunpckehub wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
997 "wunpckelub wr2, wr3 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
998 "wunpckehub wr3, wr3 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
999 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1000 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1001 "wunpckelub wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1002 "wunpckehub wr11, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1003 "waddhus wr0, wr0, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1004 "waddhus wr1, wr1, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1005 "waddhus wr2, wr2, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1006 "waddhus wr3, wr3, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1007
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1008 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1009 // [wr0 wr1 wr2 wr3]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1010 // [wr4 wr5 wr6 wr7] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1011 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1012 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1013 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1014 "wldrd wr14, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1015 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1016 "walignr1 wr6, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1017 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1018 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1019 "walignr1 wr7, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1020 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1021 "wmoveq wr11, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1022 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1023 "walignr2ne wr11, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1024 "wunpckelub wr4, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1025 "wunpckehub wr5, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1026 "wunpckelub wr6, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1027 "wunpckehub wr7, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1028 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1029 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1030 "wunpckelub wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1031 "wunpckehub wr11, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1032 "waddhus wr4, wr4, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1033 "waddhus wr5, wr5, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1034 "waddhus wr6, wr6, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1035 "waddhus wr7, wr7, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1036 "waddhus wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1037 "waddhus wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1038 "waddhus wr10, wr2, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1039 "waddhus wr11, wr3, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1040 "waddhus wr8, wr8, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1041 "waddhus wr9, wr9, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1042 "waddhus wr10, wr10, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1043 "waddhus wr11, wr11, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1044 "wsrlhg wr8, wr8, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1045 "wsrlhg wr9, wr9, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1046 "wldrd wr12, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1047 "wldrd wr13, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1048 "wsrlhg wr10, wr10, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1049 "wsrlhg wr11, wr11, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1050 "wpackhus wr8, wr8, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1051 "wpackhus wr9, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1052 WAVG2B" wr8, wr8, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1053 WAVG2B" wr9, wr9, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1054 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1055 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1056 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1057
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1058 // [wr0 wr1 wr2 wr3] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1059 // [wr4 wr5 wr6 wr7]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1060 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1061 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1062 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1063 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1064 "wldrd wr14, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1065 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1066 "walignr1 wr2, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1067 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1068 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1069 "walignr1 wr3, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1070 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1071 "wmoveq wr11, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1072 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1073 "walignr2ne wr11, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1074 "wunpckelub wr0, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1075 "wunpckehub wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1076 "wunpckelub wr2, wr3 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1077 "wunpckehub wr3, wr3 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1078 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1079 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1080 "wunpckelub wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1081 "wunpckehub wr11, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1082 "waddhus wr0, wr0, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1083 "waddhus wr1, wr1, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1084 "waddhus wr2, wr2, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1085 "waddhus wr3, wr3, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1086 "waddhus wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1087 "waddhus wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1088 "waddhus wr10, wr2, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1089 "waddhus wr11, wr3, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1090 "waddhus wr8, wr8, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1091 "waddhus wr9, wr9, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1092 "waddhus wr10, wr10, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1093 "waddhus wr11, wr11, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1094 "wsrlhg wr8, wr8, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1095 "wsrlhg wr9, wr9, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1096 "wldrd wr12, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1097 "wldrd wr13, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1098 "wsrlhg wr10, wr10, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1099 "wsrlhg wr11, wr11, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1100 "wpackhus wr8, wr8, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1101 "wpackhus wr9, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1102 WAVG2B" wr8, wr8, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1103 WAVG2B" wr9, wr9, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1104 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1105 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1106 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1107 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1108 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1109 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1110 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1111 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1112 : [line_size]"r"(line_size)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1113 : "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1114 }