annotate armv4l/dsputil_iwmmxt_rnd.h @ 6323:e6da66f378c7 libavcodec

mpegvideo.h has two function declarations with the 'inline' specifier but no definition for those functions. The C standard requires a definition to appear in the same translation unit for any function declared with 'inline'. Most of the files including mpegvideo.h do not define those functions. Fix this by removing the 'inline' specifiers from the header. patch by Uoti Urpala
author diego
date Sun, 03 Feb 2008 17:54:30 +0000
parents 533bfc034f45
children 43c97362dfa8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3699
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
1 /*
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
2 * iWMMXt optimized DSP utils
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
3 * copyright (c) 2004 AGAWA Koji
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
4 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3699
diff changeset
5 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3699
diff changeset
6 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3699
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
3699
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
9 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3699
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
3699
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
11 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3699
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
3699
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
15 * Lesser General Public License for more details.
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
16 *
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3699
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
3699
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
20 */
c537a97eec66 Add official LGPL license headers to the files that were missing them.
diego
parents: 2734
diff changeset
21
6079
533bfc034f45 Remove mistakenly added multiple inclusion guards.
diego
parents: 5828
diff changeset
22 /* This header intentionally has no multiple inclusion guards. It is meant to
533bfc034f45 Remove mistakenly added multiple inclusion guards.
diego
parents: 5828
diff changeset
23 * be included multiple times and generates different code depending on the
533bfc034f45 Remove mistakenly added multiple inclusion guards.
diego
parents: 5828
diff changeset
24 * value of certain #defines. */
5828
1876bc447aa4 Add missing multiple inclusion guards.
diego
parents: 3947
diff changeset
25
2734
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
26 void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
27 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
28 int stride = line_size;
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
29 __asm__ __volatile__ (
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
30 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
31 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
32 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
33 "add r4, %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
34 "add r5, %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
35 "mov %[line_size], %[line_size], lsl #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
36 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
37 "wldrd wr0, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
38 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
39 "wldrd wr1, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
40 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
41 "wldrd wr3, [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
42 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
43 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
44 "wldrd wr4, [r4, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
45 "add r4, r4, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
46 "walignr1 wr8, wr0, wr1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
47 "pld [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
48 "pld [r4, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
49 "walignr1 wr10, wr3, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
50 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
51 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
52 "wstrd wr10, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
53 "add r5, r5, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
54 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
55 : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
56 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
57 : "memory", "r4", "r5", "r12");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
58 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
59
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
60 void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
61 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
62 int stride = line_size;
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
63 __asm__ __volatile__ (
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
64 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
65 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
66 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
67 "add r4, %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
68 "add r5, %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
69 "mov %[line_size], %[line_size], lsl #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
70 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
71 "wldrd wr0, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
72 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
73 "wldrd wr1, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
74 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
75 "wldrd wr3, [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
76 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
77 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
78 "wldrd wr4, [r4, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
79 "add r4, r4, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
80 "walignr1 wr8, wr0, wr1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
81 "wldrd wr0, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
82 "wldrd wr2, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
83 "pld [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
84 "pld [r4, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
85 "walignr1 wr10, wr3, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
86 WAVG2B" wr8, wr8, wr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
87 WAVG2B" wr10, wr10, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
88 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
89 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
90 "wstrd wr10, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
91 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
92 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
93 "add r5, r5, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
94 "pld [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
95 "pld [r5, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
96 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
97 : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
98 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
99 : "memory", "r4", "r5", "r12");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
100 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
101
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
102 void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
103 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
104 int stride = line_size;
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
105 __asm__ __volatile__ (
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
106 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
107 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
108 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
109 "add r4, %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
110 "add r5, %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
111 "mov %[line_size], %[line_size], lsl #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
112 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
113 "wldrd wr0, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
114 "wldrd wr1, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
115 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
116 "wldrd wr2, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
117 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
118 "wldrd wr3, [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
119 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
120 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
121 "walignr1 wr8, wr0, wr1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
122 "wldrd wr4, [r4, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
123 "walignr1 wr9, wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
124 "wldrd wr5, [r4, #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
125 "add r4, r4, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
126 "pld [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
127 "pld [r4, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
128 "walignr1 wr10, wr3, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
129 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
130 "walignr1 wr11, wr4, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
131 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
132 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
133 "wstrd wr10, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
134 "wstrd wr11, [r5, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
135 "add r5, r5, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
136 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
137 : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
138 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
139 : "memory", "r4", "r5", "r12");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
140 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
141
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
142 void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
143 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
144 int stride = line_size;
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
145 __asm__ __volatile__ (
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
146 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
147 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
148 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
149 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
150 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
151 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
152 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
153 "add r4, %[pixels], %[line_size]\n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
154 "add r5, %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
155 "mov %[line_size], %[line_size], lsl #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
156 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
157 "wldrd wr0, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
158 "wldrd wr1, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
159 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
160 "wldrd wr2, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
161 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
162 "wldrd wr3, [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
163 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
164 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
165 "walignr1 wr8, wr0, wr1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
166 "wldrd wr4, [r4, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
167 "walignr1 wr9, wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
168 "wldrd wr5, [r4, #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
169 "add r4, r4, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
170 "wldrd wr0, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
171 "pld [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
172 "wldrd wr1, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
173 "pld [r4, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
174 "wldrd wr2, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
175 "walignr1 wr10, wr3, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
176 "wldrd wr3, [r5, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
177 WAVG2B" wr8, wr8, wr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
178 WAVG2B" wr9, wr9, wr1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
179 WAVG2B" wr10, wr10, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
180 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
181 "walignr1 wr11, wr4, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
182 WAVG2B" wr11, wr11, wr3 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
183 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
184 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
185 "wstrd wr10, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
186 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
187 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
188 "wstrd wr11, [r5, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
189 "add r5, r5, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
190 "pld [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
191 "pld [r5, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
192 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
193 : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
194 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
195 : "memory", "r4", "r5", "r12");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
196 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
197
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
198 void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
199 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
200 int stride = line_size;
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
201 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
202 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
203 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
204 __asm__ __volatile__(
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
205 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
206 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
207 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
208 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
209 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
210 "add r12, r12, #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
211 "add r4, %[pixels], %[line_size]\n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
212 "tmcr wcgr2, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
213 "add r5, %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
214 "mov %[line_size], %[line_size], lsl #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
215
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
216 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
217 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
218 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
219 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
220 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
221 "wldrd wr13, [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
222 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
223 "wldrd wr14, [r4, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
224 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
225 "add r4, r4, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
226 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
227 "pld [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
228 "pld [r4, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
229 "walignr1 wr2, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
230 "wmoveq wr4, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
231 "wmoveq wr6, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
232 "walignr2ne wr4, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
233 "walignr2ne wr6, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
234 WAVG2B" wr0, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
235 WAVG2B" wr2, wr2, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
236 "wstrd wr0, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
237 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
238 "wstrd wr2, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
239 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
240 "add r5, r5, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
241 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
242 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
243 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
244 : "r4", "r5", "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
245 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
246
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
247 void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
248 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
249 int stride = line_size;
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
250 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
251 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
252 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
253 __asm__ __volatile__(
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
254 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
255 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
256 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
257 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
258 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
259 "add r12, r12, #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
260 "add r4, %[pixels], %[line_size]\n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
261 "tmcr wcgr2, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
262 "add r5, %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
263 "mov %[line_size], %[line_size], lsl #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
264
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
265 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
266 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
267 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
268 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
269 "wldrd wr12, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
270 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
271 "wldrd wr13, [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
272 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
273 "wldrd wr14, [r4, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
274 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
275 "wldrd wr15, [r4, #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
276 "add r4, r4, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
277 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
278 "pld [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
279 "pld [r4, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
280 "walignr1 wr1, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
281 "walignr1 wr2, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
282 "walignr1 wr3, wr14, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
283 "wmoveq wr4, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
284 "wmoveq wr5, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
285 "wmoveq wr6, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
286 "wmoveq wr7, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
287 "walignr2ne wr4, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
288 "walignr2ne wr5, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
289 "walignr2ne wr6, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
290 "walignr2ne wr7, wr14, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
291 WAVG2B" wr0, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
292 WAVG2B" wr1, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
293 "wstrd wr0, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
294 WAVG2B" wr2, wr2, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
295 "wstrd wr1, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
296 WAVG2B" wr3, wr3, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
297 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
298 "wstrd wr2, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
299 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
300 "wstrd wr3, [r5, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
301 "add r5, r5, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
302 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
303 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
304 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
305 : "r4", "r5", "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
306 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
307
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
308 void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
309 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
310 int stride = line_size;
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
311 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
312 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
313 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
314 __asm__ __volatile__(
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
315 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
316 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
317 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
318 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
319 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
320 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
321 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
322 "add r12, r12, #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
323 "add r4, %[pixels], %[line_size]\n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
324 "tmcr wcgr2, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
325 "add r5, %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
326 "mov %[line_size], %[line_size], lsl #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
327 "pld [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
328 "pld [r5, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
329
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
330 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
331 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
332 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
333 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
334 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
335 "wldrd wr13, [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
336 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
337 "wldrd wr14, [r4, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
338 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
339 "add r4, r4, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
340 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
341 "pld [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
342 "pld [r4, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
343 "walignr1 wr2, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
344 "wmoveq wr4, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
345 "wmoveq wr6, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
346 "walignr2ne wr4, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
347 "wldrd wr10, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
348 "walignr2ne wr6, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
349 "wldrd wr12, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
350 WAVG2B" wr0, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
351 WAVG2B" wr2, wr2, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
352 WAVG2B" wr0, wr0, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
353 WAVG2B" wr2, wr2, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
354 "wstrd wr0, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
355 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
356 "wstrd wr2, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
357 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
358 "add r5, r5, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
359 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
360 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
361 "pld [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
362 "pld [r5, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
363 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
364 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
365 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
366 : "r4", "r5", "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
367 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
368
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
369 void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
370 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
371 int stride = line_size;
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
372 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
373 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
374 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
375 __asm__ __volatile__(
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
376 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
377 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
378 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
379 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
380 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
381 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
382 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
383 "add r12, r12, #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
384 "add r4, %[pixels], %[line_size]\n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
385 "tmcr wcgr2, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
386 "add r5, %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
387 "mov %[line_size], %[line_size], lsl #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
388 "pld [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
389 "pld [r5, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
390
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
391 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
392 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
393 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
394 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
395 "wldrd wr12, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
396 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
397 "wldrd wr13, [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
398 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
399 "wldrd wr14, [r4, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
400 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
401 "wldrd wr15, [r4, #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
402 "add r4, r4, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
403 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
404 "pld [r4] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
405 "pld [r4, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
406 "walignr1 wr1, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
407 "walignr1 wr2, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
408 "walignr1 wr3, wr14, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
409 "wmoveq wr4, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
410 "wmoveq wr5, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
411 "wmoveq wr6, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
412 "wmoveq wr7, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
413 "walignr2ne wr4, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
414 "walignr2ne wr5, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
415 "walignr2ne wr6, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
416 "walignr2ne wr7, wr14, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
417 "wldrd wr10, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
418 WAVG2B" wr0, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
419 "wldrd wr11, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
420 WAVG2B" wr1, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
421 "wldrd wr12, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
422 WAVG2B" wr2, wr2, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
423 "wldrd wr13, [r5, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
424 WAVG2B" wr3, wr3, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
425 WAVG2B" wr0, wr0, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
426 WAVG2B" wr1, wr1, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
427 WAVG2B" wr2, wr2, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
428 WAVG2B" wr3, wr3, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
429 "wstrd wr0, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
430 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
431 "wstrd wr1, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
432 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
433 "wstrd wr2, [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
434 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
435 "wstrd wr3, [r5, #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
436 "add r5, r5, %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
437 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
438 "pld [r5] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
439 "pld [r5, #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
440 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
441 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
442 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
443 :"r4", "r5", "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
444 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
445
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
446 void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
447 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
448 int stride = line_size;
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
449 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
450 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
451 __asm__ __volatile__(
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
452 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
453 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
454 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
455 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
456 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
457
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
458 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
459 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
460 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
461 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
462 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
463 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
464 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
465
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
466 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
467 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
468 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
469 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
470 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
471 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
472 "walignr1 wr4, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
473 "wldrd wr10, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
474 WAVG2B" wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
475 WAVG2B" wr8, wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
476 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
477 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
478
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
479 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
480 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
481 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
482 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
483 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
484 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
485 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
486 "wldrd wr10, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
487 WAVG2B" wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
488 WAVG2B" wr8, wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
489 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
490 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
491
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
492 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
493 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
494 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
495 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
496 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
497 : "cc", "memory", "r12");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
498 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
499
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
500 void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
501 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
502 int stride = line_size;
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
503 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
504 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
505 __asm__ __volatile__(
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
506 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
507 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
508 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
509 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
510 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
511
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
512 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
513 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
514 "wldrd wr12, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
515 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
516 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
517 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
518 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
519 "walignr1 wr1, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
520
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
521 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
522 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
523 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
524 "wldrd wr12, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
525 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
526 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
527 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
528 "walignr1 wr4, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
529 "walignr1 wr5, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
530 WAVG2B" wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
531 WAVG2B" wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
532 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
533 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
534 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
535
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
536 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
537 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
538 "wldrd wr12, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
539 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
540 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
541 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
542 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
543 "walignr1 wr1, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
544 WAVG2B" wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
545 WAVG2B" wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
546 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
547 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
548 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
549
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
550 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
551 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
552 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
553 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
554 : "r4", "r5", "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
555 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
556
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
557 void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
558 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
559 int stride = line_size;
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
560 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
561 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
562 __asm__ __volatile__(
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
563 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
564 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
565 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
566 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
567 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
568
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
569 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
570 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
571 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
572 "wldrd wr12, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
573 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
574 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
575 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
576 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
577 "walignr1 wr1, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
578
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
579 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
580 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
581 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
582 "wldrd wr12, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
583 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
584 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
585 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
586 "walignr1 wr4, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
587 "walignr1 wr5, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
588 "wldrd wr10, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
589 "wldrd wr11, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
590 WAVG2B" wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
591 WAVG2B" wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
592 WAVG2B" wr8, wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
593 WAVG2B" wr9, wr9, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
594 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
595 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
596 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
597
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
598 "wldrd wr10, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
599 "wldrd wr11, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
600 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
601 "wldrd wr12, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
602 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
603 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
604 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
605 "walignr1 wr0, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
606 "walignr1 wr1, wr11, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
607 "wldrd wr10, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
608 "wldrd wr11, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
609 WAVG2B" wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
610 WAVG2B" wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
611 WAVG2B" wr8, wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
612 WAVG2B" wr9, wr9, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
613 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
614 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
615 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
616
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
617 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
618 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
619 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
620 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
621 :
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
622 : "r4", "r5", "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
623 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
624
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
625 void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
626 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
627 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
628 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
629 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
630 __asm__ __volatile__(
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
631 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
632 "mov r12, #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
633 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
634 "tmcr wcgr0, r12 \n\t" /* for shift value */
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
635 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
636 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
637 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
638
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
639 // [wr0 wr1 wr2 wr3] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
640 // [wr4 wr5 wr6 wr7]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
641 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
642 "add r12, r12, #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
643 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
644 "tmcr wcgr2, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
645 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
646 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
647 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
648 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
649 "walignr1 wr2, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
650 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
651 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
652 "wunpckelub wr0, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
653 "wunpckehub wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
654 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
655 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
656 "waddhus wr0, wr0, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
657 "waddhus wr1, wr1, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
658
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
659 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
660 // [wr0 wr1 wr2 wr3]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
661 // [wr4 wr5 wr6 wr7] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
662 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
663 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
664 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
665 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
666 "walignr1 wr6, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
667 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
668 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
669 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
670 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
671 "wunpckelub wr4, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
672 "wunpckehub wr5, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
673 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
674 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
675 "waddhus wr4, wr4, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
676 "waddhus wr5, wr5, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
677 "waddhus wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
678 "waddhus wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
679 "waddhus wr8, wr8, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
680 "waddhus wr9, wr9, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
681 "wsrlhg wr8, wr8, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
682 "wsrlhg wr9, wr9, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
683 "wpackhus wr8, wr8, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
684 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
685 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
686
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
687 // [wr0 wr1 wr2 wr3] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
688 // [wr4 wr5 wr6 wr7]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
689 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
690 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
691 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
692 "walignr1 wr2, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
693 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
694 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
695 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
696 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
697 "wunpckelub wr0, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
698 "wunpckehub wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
699 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
700 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
701 "waddhus wr0, wr0, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
702 "waddhus wr1, wr1, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
703 "waddhus wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
704 "waddhus wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
705 "waddhus wr8, wr8, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
706 "waddhus wr9, wr9, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
707 "wsrlhg wr8, wr8, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
708 "wsrlhg wr9, wr9, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
709 "wpackhus wr8, wr8, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
710 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
711 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
712 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
713 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
714 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
715 : [line_size]"r"(line_size)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
716 : "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
717 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
718
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
719 void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
720 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
721 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
722 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
723 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
724 __asm__ __volatile__(
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
725 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
726 "mov r12, #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
727 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
728 "tmcr wcgr0, r12 \n\t" /* for shift value */
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
729 /* alignment */
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
730 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
731 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
732 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
733 "add r12, r12, #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
734 "tmcr wcgr2, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
735
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
736 // [wr0 wr1 wr2 wr3] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
737 // [wr4 wr5 wr6 wr7]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
738 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
739 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
740 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
741 "wldrd wr14, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
742 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
743 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
744 "walignr1 wr2, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
745 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
746 "walignr1 wr3, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
747 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
748 "wmoveq wr11, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
749 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
750 "walignr2ne wr11, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
751 "wunpckelub wr0, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
752 "wunpckehub wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
753 "wunpckelub wr2, wr3 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
754 "wunpckehub wr3, wr3 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
755 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
756 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
757 "wunpckelub wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
758 "wunpckehub wr11, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
759 "waddhus wr0, wr0, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
760 "waddhus wr1, wr1, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
761 "waddhus wr2, wr2, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
762 "waddhus wr3, wr3, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
763
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
764 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
765 // [wr0 wr1 wr2 wr3]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
766 // [wr4 wr5 wr6 wr7] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
767 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
768 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
769 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
770 "wldrd wr14, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
771 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
772 "walignr1 wr6, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
773 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
774 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
775 "walignr1 wr7, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
776 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
777 "wmoveq wr11, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
778 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
779 "walignr2ne wr11, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
780 "wunpckelub wr4, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
781 "wunpckehub wr5, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
782 "wunpckelub wr6, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
783 "wunpckehub wr7, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
784 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
785 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
786 "wunpckelub wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
787 "wunpckehub wr11, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
788 "waddhus wr4, wr4, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
789 "waddhus wr5, wr5, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
790 "waddhus wr6, wr6, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
791 "waddhus wr7, wr7, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
792 "waddhus wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
793 "waddhus wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
794 "waddhus wr10, wr2, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
795 "waddhus wr11, wr3, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
796 "waddhus wr8, wr8, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
797 "waddhus wr9, wr9, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
798 "waddhus wr10, wr10, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
799 "waddhus wr11, wr11, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
800 "wsrlhg wr8, wr8, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
801 "wsrlhg wr9, wr9, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
802 "wsrlhg wr10, wr10, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
803 "wsrlhg wr11, wr11, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
804 "wpackhus wr8, wr8, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
805 "wpackhus wr9, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
806 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
807 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
808 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
809
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
810 // [wr0 wr1 wr2 wr3] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
811 // [wr4 wr5 wr6 wr7]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
812 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
813 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
814 "wldrd wr14, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
815 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
816 "walignr1 wr2, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
817 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
818 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
819 "walignr1 wr3, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
820 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
821 "wmoveq wr11, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
822 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
823 "walignr2ne wr11, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
824 "wunpckelub wr0, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
825 "wunpckehub wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
826 "wunpckelub wr2, wr3 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
827 "wunpckehub wr3, wr3 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
828 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
829 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
830 "wunpckelub wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
831 "wunpckehub wr11, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
832 "waddhus wr0, wr0, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
833 "waddhus wr1, wr1, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
834 "waddhus wr2, wr2, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
835 "waddhus wr3, wr3, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
836 "waddhus wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
837 "waddhus wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
838 "waddhus wr10, wr2, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
839 "waddhus wr11, wr3, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
840 "waddhus wr8, wr8, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
841 "waddhus wr9, wr9, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
842 "waddhus wr10, wr10, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
843 "waddhus wr11, wr11, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
844 "wsrlhg wr8, wr8, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
845 "wsrlhg wr9, wr9, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
846 "wsrlhg wr10, wr10, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
847 "wsrlhg wr11, wr11, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
848 "wpackhus wr8, wr8, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
849 "wpackhus wr9, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
850 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
851 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
852 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
853
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
854 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
855 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
856 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
857 : [line_size]"r"(line_size)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
858 : "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
859 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
860
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
861 void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
862 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
863 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
864 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
865 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
866 __asm__ __volatile__(
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
867 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
868 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
869 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
870 "mov r12, #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
871 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
872 "tmcr wcgr0, r12 \n\t" /* for shift value */
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
873 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
874 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
875 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
876
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
877 // [wr0 wr1 wr2 wr3] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
878 // [wr4 wr5 wr6 wr7]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
879 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
880 "add r12, r12, #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
881 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
882 "tmcr wcgr2, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
883 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
884 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
885 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
886 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
887 "walignr1 wr2, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
888 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
889 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
890 "wunpckelub wr0, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
891 "wunpckehub wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
892 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
893 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
894 "waddhus wr0, wr0, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
895 "waddhus wr1, wr1, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
896
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
897 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
898 // [wr0 wr1 wr2 wr3]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
899 // [wr4 wr5 wr6 wr7] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
900 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
901 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
902 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
903 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
904 "walignr1 wr6, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
905 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
906 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
907 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
908 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
909 "wunpckelub wr4, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
910 "wunpckehub wr5, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
911 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
912 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
913 "waddhus wr4, wr4, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
914 "waddhus wr5, wr5, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
915 "waddhus wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
916 "waddhus wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
917 "waddhus wr8, wr8, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
918 "waddhus wr9, wr9, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
919 "wldrd wr12, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
920 "wsrlhg wr8, wr8, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
921 "wsrlhg wr9, wr9, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
922 "wpackhus wr8, wr8, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
923 WAVG2B" wr8, wr8, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
924 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
925 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
926 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
927 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
928 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
929
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
930 // [wr0 wr1 wr2 wr3] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
931 // [wr4 wr5 wr6 wr7]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
932 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
933 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
934 "walignr1 wr2, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
935 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
936 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
937 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
938 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
939 "wunpckelub wr0, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
940 "wunpckehub wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
941 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
942 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
943 "waddhus wr0, wr0, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
944 "waddhus wr1, wr1, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
945 "waddhus wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
946 "waddhus wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
947 "waddhus wr8, wr8, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
948 "waddhus wr9, wr9, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
949 "wldrd wr12, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
950 "wsrlhg wr8, wr8, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
951 "wsrlhg wr9, wr9, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
952 "wpackhus wr8, wr8, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
953 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
954 WAVG2B" wr8, wr8, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
955 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
956 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
957 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
958 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
959 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
960 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
961 : [line_size]"r"(line_size)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
962 : "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
963 }
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
964
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
965 void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
966 {
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
967 // [wr0 wr1 wr2 wr3] for previous line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
968 // [wr4 wr5 wr6 wr7] for current line
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
969 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
970 __asm__ __volatile__(
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
971 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
972 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
973 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
974 "mov r12, #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
975 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
976 "tmcr wcgr0, r12 \n\t" /* for shift value */
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
977 /* alignment */
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
978 "and r12, %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
979 "bic %[pixels], %[pixels], #7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
980 "tmcr wcgr1, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
981 "add r12, r12, #1 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
982 "tmcr wcgr2, r12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
983
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
984 // [wr0 wr1 wr2 wr3] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
985 // [wr4 wr5 wr6 wr7]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
986 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
987 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
988 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
989 "wldrd wr14, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
990 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
991 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
992 "walignr1 wr2, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
993 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
994 "walignr1 wr3, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
995 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
996 "wmoveq wr11, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
997 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
998 "walignr2ne wr11, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
999 "wunpckelub wr0, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1000 "wunpckehub wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1001 "wunpckelub wr2, wr3 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1002 "wunpckehub wr3, wr3 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1003 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1004 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1005 "wunpckelub wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1006 "wunpckehub wr11, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1007 "waddhus wr0, wr0, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1008 "waddhus wr1, wr1, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1009 "waddhus wr2, wr2, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1010 "waddhus wr3, wr3, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1011
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1012 "1: \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1013 // [wr0 wr1 wr2 wr3]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1014 // [wr4 wr5 wr6 wr7] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1015 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1016 "cmp r12, #8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1017 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1018 "wldrd wr14, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1019 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1020 "walignr1 wr6, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1021 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1022 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1023 "walignr1 wr7, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1024 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1025 "wmoveq wr11, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1026 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1027 "walignr2ne wr11, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1028 "wunpckelub wr4, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1029 "wunpckehub wr5, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1030 "wunpckelub wr6, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1031 "wunpckehub wr7, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1032 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1033 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1034 "wunpckelub wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1035 "wunpckehub wr11, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1036 "waddhus wr4, wr4, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1037 "waddhus wr5, wr5, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1038 "waddhus wr6, wr6, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1039 "waddhus wr7, wr7, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1040 "waddhus wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1041 "waddhus wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1042 "waddhus wr10, wr2, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1043 "waddhus wr11, wr3, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1044 "waddhus wr8, wr8, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1045 "waddhus wr9, wr9, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1046 "waddhus wr10, wr10, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1047 "waddhus wr11, wr11, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1048 "wsrlhg wr8, wr8, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1049 "wsrlhg wr9, wr9, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1050 "wldrd wr12, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1051 "wldrd wr13, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1052 "wsrlhg wr10, wr10, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1053 "wsrlhg wr11, wr11, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1054 "wpackhus wr8, wr8, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1055 "wpackhus wr9, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1056 WAVG2B" wr8, wr8, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1057 WAVG2B" wr9, wr9, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1058 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1059 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1060 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1061
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1062 // [wr0 wr1 wr2 wr3] <= *
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1063 // [wr4 wr5 wr6 wr7]
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1064 "wldrd wr12, [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1065 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1066 "wldrd wr13, [%[pixels], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1067 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1068 "wldrd wr14, [%[pixels], #16] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1069 "add %[pixels], %[pixels], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1070 "walignr1 wr2, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1071 "pld [%[pixels]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1072 "pld [%[pixels], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1073 "walignr1 wr3, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1074 "wmoveq wr10, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1075 "wmoveq wr11, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1076 "walignr2ne wr10, wr12, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1077 "walignr2ne wr11, wr13, wr14 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1078 "wunpckelub wr0, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1079 "wunpckehub wr1, wr2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1080 "wunpckelub wr2, wr3 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1081 "wunpckehub wr3, wr3 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1082 "wunpckelub wr8, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1083 "wunpckehub wr9, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1084 "wunpckelub wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1085 "wunpckehub wr11, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1086 "waddhus wr0, wr0, wr8 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1087 "waddhus wr1, wr1, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1088 "waddhus wr2, wr2, wr10 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1089 "waddhus wr3, wr3, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1090 "waddhus wr8, wr0, wr4 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1091 "waddhus wr9, wr1, wr5 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1092 "waddhus wr10, wr2, wr6 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1093 "waddhus wr11, wr3, wr7 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1094 "waddhus wr8, wr8, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1095 "waddhus wr9, wr9, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1096 "waddhus wr10, wr10, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1097 "waddhus wr11, wr11, wr15 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1098 "wsrlhg wr8, wr8, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1099 "wsrlhg wr9, wr9, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1100 "wldrd wr12, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1101 "wldrd wr13, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1102 "wsrlhg wr10, wr10, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1103 "wsrlhg wr11, wr11, wcgr0 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1104 "wpackhus wr8, wr8, wr9 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1105 "wpackhus wr9, wr10, wr11 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1106 WAVG2B" wr8, wr8, wr12 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1107 WAVG2B" wr9, wr9, wr13 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1108 "wstrd wr8, [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1109 "wstrd wr9, [%[block], #8] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1110 "add %[block], %[block], %[line_size] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1111 "subs %[h], %[h], #2 \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1112 "pld [%[block]] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1113 "pld [%[block], #32] \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1114 "bne 1b \n\t"
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1115 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1116 : [line_size]"r"(line_size)
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1117 : "r12", "memory");
aeea63c97878 Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff changeset
1118 }