Mercurial > libavcodec.hg
annotate armv4l/dsputil_iwmmxt_rnd.h @ 2791:f191093dc8fe libavcodec
check for CODEC_CAP_DELAY in audio decoders too
author | michael |
---|---|
date | Thu, 14 Jul 2005 15:30:39 +0000 |
parents | aeea63c97878 |
children | c537a97eec66 |
rev | line source |
---|---|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1 void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
2 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
3 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
4 __asm__ __volatile__ ( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
5 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
6 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
7 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
8 "add r4, %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
9 "add r5, %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
10 "mov %[line_size], %[line_size], lsl #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
11 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
12 "wldrd wr0, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
13 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
14 "wldrd wr1, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
15 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
16 "wldrd wr3, [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
17 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
18 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
19 "wldrd wr4, [r4, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
20 "add r4, r4, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
21 "walignr1 wr8, wr0, wr1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
22 "pld [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
23 "pld [r4, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
24 "walignr1 wr10, wr3, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
25 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
26 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
27 "wstrd wr10, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
28 "add r5, r5, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
29 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
30 : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
31 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
32 : "memory", "r4", "r5", "r12"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
33 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
34 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
35 void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
36 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
37 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
38 __asm__ __volatile__ ( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
39 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
40 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
41 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
42 "add r4, %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
43 "add r5, %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
44 "mov %[line_size], %[line_size], lsl #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
45 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
46 "wldrd wr0, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
47 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
48 "wldrd wr1, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
49 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
50 "wldrd wr3, [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
51 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
52 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
53 "wldrd wr4, [r4, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
54 "add r4, r4, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
55 "walignr1 wr8, wr0, wr1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
56 "wldrd wr0, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
57 "wldrd wr2, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
58 "pld [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
59 "pld [r4, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
60 "walignr1 wr10, wr3, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
61 WAVG2B" wr8, wr8, wr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
62 WAVG2B" wr10, wr10, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
63 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
64 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
65 "wstrd wr10, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
66 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
67 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
68 "add r5, r5, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
69 "pld [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
70 "pld [r5, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
71 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
72 : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
73 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
74 : "memory", "r4", "r5", "r12"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
75 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
76 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
77 void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
78 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
79 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
80 __asm__ __volatile__ ( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
81 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
82 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
83 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
84 "add r4, %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
85 "add r5, %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
86 "mov %[line_size], %[line_size], lsl #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
87 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
88 "wldrd wr0, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
89 "wldrd wr1, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
90 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
91 "wldrd wr2, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
92 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
93 "wldrd wr3, [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
94 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
95 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
96 "walignr1 wr8, wr0, wr1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
97 "wldrd wr4, [r4, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
98 "walignr1 wr9, wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
99 "wldrd wr5, [r4, #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
100 "add r4, r4, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
101 "pld [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
102 "pld [r4, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
103 "walignr1 wr10, wr3, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
104 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
105 "walignr1 wr11, wr4, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
106 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
107 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
108 "wstrd wr10, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
109 "wstrd wr11, [r5, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
110 "add r5, r5, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
111 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
112 : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
113 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
114 : "memory", "r4", "r5", "r12"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
115 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
116 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
117 void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
118 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
119 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
120 __asm__ __volatile__ ( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
121 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
122 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
123 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
124 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
125 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
126 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
127 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
128 "add r4, %[pixels], %[line_size]\n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
129 "add r5, %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
130 "mov %[line_size], %[line_size], lsl #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
131 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
132 "wldrd wr0, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
133 "wldrd wr1, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
134 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
135 "wldrd wr2, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
136 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
137 "wldrd wr3, [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
138 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
139 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
140 "walignr1 wr8, wr0, wr1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
141 "wldrd wr4, [r4, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
142 "walignr1 wr9, wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
143 "wldrd wr5, [r4, #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
144 "add r4, r4, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
145 "wldrd wr0, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
146 "pld [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
147 "wldrd wr1, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
148 "pld [r4, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
149 "wldrd wr2, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
150 "walignr1 wr10, wr3, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
151 "wldrd wr3, [r5, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
152 WAVG2B" wr8, wr8, wr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
153 WAVG2B" wr9, wr9, wr1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
154 WAVG2B" wr10, wr10, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
155 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
156 "walignr1 wr11, wr4, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
157 WAVG2B" wr11, wr11, wr3 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
158 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
159 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
160 "wstrd wr10, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
161 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
162 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
163 "wstrd wr11, [r5, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
164 "add r5, r5, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
165 "pld [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
166 "pld [r5, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
167 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
168 : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
169 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
170 : "memory", "r4", "r5", "r12"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
171 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
172 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
173 void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
174 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
175 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
176 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
177 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
178 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
179 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
180 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
181 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
182 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
183 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
184 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
185 "add r12, r12, #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
186 "add r4, %[pixels], %[line_size]\n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
187 "tmcr wcgr2, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
188 "add r5, %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
189 "mov %[line_size], %[line_size], lsl #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
190 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
191 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
192 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
193 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
194 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
195 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
196 "wldrd wr13, [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
197 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
198 "wldrd wr14, [r4, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
199 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
200 "add r4, r4, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
201 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
202 "pld [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
203 "pld [r4, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
204 "walignr1 wr2, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
205 "wmoveq wr4, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
206 "wmoveq wr6, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
207 "walignr2ne wr4, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
208 "walignr2ne wr6, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
209 WAVG2B" wr0, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
210 WAVG2B" wr2, wr2, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
211 "wstrd wr0, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
212 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
213 "wstrd wr2, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
214 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
215 "add r5, r5, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
216 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
217 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
218 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
219 : "r4", "r5", "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
220 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
221 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
222 void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
223 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
224 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
225 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
226 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
227 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
228 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
229 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
230 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
231 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
232 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
233 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
234 "add r12, r12, #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
235 "add r4, %[pixels], %[line_size]\n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
236 "tmcr wcgr2, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
237 "add r5, %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
238 "mov %[line_size], %[line_size], lsl #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
239 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
240 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
241 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
242 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
243 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
244 "wldrd wr12, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
245 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
246 "wldrd wr13, [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
247 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
248 "wldrd wr14, [r4, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
249 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
250 "wldrd wr15, [r4, #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
251 "add r4, r4, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
252 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
253 "pld [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
254 "pld [r4, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
255 "walignr1 wr1, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
256 "walignr1 wr2, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
257 "walignr1 wr3, wr14, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
258 "wmoveq wr4, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
259 "wmoveq wr5, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
260 "wmoveq wr6, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
261 "wmoveq wr7, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
262 "walignr2ne wr4, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
263 "walignr2ne wr5, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
264 "walignr2ne wr6, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
265 "walignr2ne wr7, wr14, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
266 WAVG2B" wr0, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
267 WAVG2B" wr1, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
268 "wstrd wr0, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
269 WAVG2B" wr2, wr2, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
270 "wstrd wr1, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
271 WAVG2B" wr3, wr3, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
272 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
273 "wstrd wr2, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
274 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
275 "wstrd wr3, [r5, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
276 "add r5, r5, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
277 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
278 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
279 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
280 : "r4", "r5", "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
281 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
282 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
283 void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
284 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
285 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
286 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
287 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
288 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
289 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
290 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
291 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
292 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
293 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
294 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
295 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
296 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
297 "add r12, r12, #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
298 "add r4, %[pixels], %[line_size]\n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
299 "tmcr wcgr2, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
300 "add r5, %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
301 "mov %[line_size], %[line_size], lsl #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
302 "pld [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
303 "pld [r5, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
304 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
305 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
306 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
307 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
308 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
309 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
310 "wldrd wr13, [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
311 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
312 "wldrd wr14, [r4, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
313 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
314 "add r4, r4, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
315 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
316 "pld [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
317 "pld [r4, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
318 "walignr1 wr2, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
319 "wmoveq wr4, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
320 "wmoveq wr6, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
321 "walignr2ne wr4, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
322 "wldrd wr10, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
323 "walignr2ne wr6, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
324 "wldrd wr12, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
325 WAVG2B" wr0, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
326 WAVG2B" wr2, wr2, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
327 WAVG2B" wr0, wr0, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
328 WAVG2B" wr2, wr2, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
329 "wstrd wr0, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
330 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
331 "wstrd wr2, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
332 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
333 "add r5, r5, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
334 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
335 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
336 "pld [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
337 "pld [r5, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
338 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
339 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
340 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
341 : "r4", "r5", "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
342 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
343 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
344 void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
345 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
346 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
347 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
348 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
349 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
350 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
351 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
352 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
353 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
354 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
355 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
356 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
357 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
358 "add r12, r12, #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
359 "add r4, %[pixels], %[line_size]\n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
360 "tmcr wcgr2, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
361 "add r5, %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
362 "mov %[line_size], %[line_size], lsl #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
363 "pld [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
364 "pld [r5, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
365 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
366 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
367 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
368 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
369 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
370 "wldrd wr12, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
371 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
372 "wldrd wr13, [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
373 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
374 "wldrd wr14, [r4, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
375 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
376 "wldrd wr15, [r4, #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
377 "add r4, r4, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
378 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
379 "pld [r4] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
380 "pld [r4, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
381 "walignr1 wr1, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
382 "walignr1 wr2, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
383 "walignr1 wr3, wr14, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
384 "wmoveq wr4, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
385 "wmoveq wr5, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
386 "wmoveq wr6, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
387 "wmoveq wr7, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
388 "walignr2ne wr4, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
389 "walignr2ne wr5, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
390 "walignr2ne wr6, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
391 "walignr2ne wr7, wr14, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
392 "wldrd wr10, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
393 WAVG2B" wr0, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
394 "wldrd wr11, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
395 WAVG2B" wr1, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
396 "wldrd wr12, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
397 WAVG2B" wr2, wr2, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
398 "wldrd wr13, [r5, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
399 WAVG2B" wr3, wr3, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
400 WAVG2B" wr0, wr0, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
401 WAVG2B" wr1, wr1, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
402 WAVG2B" wr2, wr2, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
403 WAVG2B" wr3, wr3, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
404 "wstrd wr0, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
405 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
406 "wstrd wr1, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
407 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
408 "wstrd wr2, [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
409 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
410 "wstrd wr3, [r5, #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
411 "add r5, r5, %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
412 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
413 "pld [r5] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
414 "pld [r5, #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
415 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
416 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
417 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
418 :"r4", "r5", "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
419 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
420 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
421 void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
422 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
423 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
424 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
425 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
426 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
427 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
428 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
429 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
430 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
431 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
432 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
433 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
434 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
435 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
436 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
437 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
438 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
439 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
440 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
441 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
442 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
443 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
444 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
445 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
446 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
447 "walignr1 wr4, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
448 "wldrd wr10, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
449 WAVG2B" wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
450 WAVG2B" wr8, wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
451 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
452 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
453 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
454 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
455 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
456 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
457 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
458 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
459 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
460 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
461 "wldrd wr10, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
462 WAVG2B" wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
463 WAVG2B" wr8, wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
464 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
465 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
466 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
467 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
468 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
469 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
470 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
471 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
472 : "cc", "memory", "r12"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
473 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
474 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
475 void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
476 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
477 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
478 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
479 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
480 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
481 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
482 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
483 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
484 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
485 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
486 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
487 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
488 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
489 "wldrd wr12, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
490 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
491 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
492 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
493 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
494 "walignr1 wr1, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
495 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
496 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
497 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
498 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
499 "wldrd wr12, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
500 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
501 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
502 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
503 "walignr1 wr4, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
504 "walignr1 wr5, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
505 WAVG2B" wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
506 WAVG2B" wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
507 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
508 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
509 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
510 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
511 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
512 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
513 "wldrd wr12, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
514 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
515 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
516 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
517 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
518 "walignr1 wr1, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
519 WAVG2B" wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
520 WAVG2B" wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
521 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
522 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
523 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
524 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
525 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
526 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
527 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
528 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
529 : "r4", "r5", "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
530 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
531 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
532 void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
533 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
534 int stride = line_size; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
535 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
536 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
537 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
538 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
539 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
540 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
541 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
542 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
543 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
544 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
545 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
546 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
547 "wldrd wr12, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
548 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
549 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
550 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
551 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
552 "walignr1 wr1, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
553 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
554 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
555 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
556 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
557 "wldrd wr12, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
558 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
559 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
560 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
561 "walignr1 wr4, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
562 "walignr1 wr5, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
563 "wldrd wr10, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
564 "wldrd wr11, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
565 WAVG2B" wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
566 WAVG2B" wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
567 WAVG2B" wr8, wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
568 WAVG2B" wr9, wr9, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
569 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
570 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
571 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
572 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
573 "wldrd wr10, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
574 "wldrd wr11, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
575 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
576 "wldrd wr12, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
577 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
578 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
579 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
580 "walignr1 wr0, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
581 "walignr1 wr1, wr11, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
582 "wldrd wr10, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
583 "wldrd wr11, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
584 WAVG2B" wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
585 WAVG2B" wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
586 WAVG2B" wr8, wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
587 WAVG2B" wr9, wr9, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
588 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
589 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
590 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
591 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
592 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
593 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
594 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
595 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
596 : |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
597 : "r4", "r5", "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
598 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
599 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
600 void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
601 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
602 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
603 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
604 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
605 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
606 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
607 "mov r12, #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
608 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
609 "tmcr wcgr0, r12 \n\t" /* for shift value */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
610 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
611 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
612 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
613 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
614 // [wr0 wr1 wr2 wr3] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
615 // [wr4 wr5 wr6 wr7] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
616 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
617 "add r12, r12, #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
618 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
619 "tmcr wcgr2, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
620 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
621 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
622 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
623 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
624 "walignr1 wr2, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
625 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
626 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
627 "wunpckelub wr0, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
628 "wunpckehub wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
629 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
630 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
631 "waddhus wr0, wr0, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
632 "waddhus wr1, wr1, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
633 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
634 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
635 // [wr0 wr1 wr2 wr3] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
636 // [wr4 wr5 wr6 wr7] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
637 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
638 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
639 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
640 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
641 "walignr1 wr6, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
642 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
643 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
644 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
645 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
646 "wunpckelub wr4, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
647 "wunpckehub wr5, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
648 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
649 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
650 "waddhus wr4, wr4, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
651 "waddhus wr5, wr5, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
652 "waddhus wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
653 "waddhus wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
654 "waddhus wr8, wr8, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
655 "waddhus wr9, wr9, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
656 "wsrlhg wr8, wr8, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
657 "wsrlhg wr9, wr9, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
658 "wpackhus wr8, wr8, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
659 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
660 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
661 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
662 // [wr0 wr1 wr2 wr3] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
663 // [wr4 wr5 wr6 wr7] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
664 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
665 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
666 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
667 "walignr1 wr2, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
668 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
669 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
670 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
671 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
672 "wunpckelub wr0, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
673 "wunpckehub wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
674 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
675 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
676 "waddhus wr0, wr0, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
677 "waddhus wr1, wr1, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
678 "waddhus wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
679 "waddhus wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
680 "waddhus wr8, wr8, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
681 "waddhus wr9, wr9, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
682 "wsrlhg wr8, wr8, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
683 "wsrlhg wr9, wr9, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
684 "wpackhus wr8, wr8, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
685 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
686 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
687 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
688 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
689 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
690 : [line_size]"r"(line_size) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
691 : "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
692 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
693 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
694 void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
695 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
696 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
697 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
698 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
699 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
700 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
701 "mov r12, #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
702 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
703 "tmcr wcgr0, r12 \n\t" /* for shift value */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
704 /* alignment */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
705 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
706 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
707 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
708 "add r12, r12, #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
709 "tmcr wcgr2, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
710 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
711 // [wr0 wr1 wr2 wr3] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
712 // [wr4 wr5 wr6 wr7] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
713 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
714 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
715 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
716 "wldrd wr14, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
717 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
718 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
719 "walignr1 wr2, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
720 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
721 "walignr1 wr3, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
722 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
723 "wmoveq wr11, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
724 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
725 "walignr2ne wr11, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
726 "wunpckelub wr0, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
727 "wunpckehub wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
728 "wunpckelub wr2, wr3 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
729 "wunpckehub wr3, wr3 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
730 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
731 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
732 "wunpckelub wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
733 "wunpckehub wr11, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
734 "waddhus wr0, wr0, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
735 "waddhus wr1, wr1, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
736 "waddhus wr2, wr2, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
737 "waddhus wr3, wr3, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
738 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
739 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
740 // [wr0 wr1 wr2 wr3] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
741 // [wr4 wr5 wr6 wr7] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
742 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
743 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
744 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
745 "wldrd wr14, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
746 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
747 "walignr1 wr6, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
748 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
749 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
750 "walignr1 wr7, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
751 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
752 "wmoveq wr11, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
753 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
754 "walignr2ne wr11, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
755 "wunpckelub wr4, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
756 "wunpckehub wr5, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
757 "wunpckelub wr6, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
758 "wunpckehub wr7, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
759 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
760 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
761 "wunpckelub wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
762 "wunpckehub wr11, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
763 "waddhus wr4, wr4, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
764 "waddhus wr5, wr5, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
765 "waddhus wr6, wr6, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
766 "waddhus wr7, wr7, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
767 "waddhus wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
768 "waddhus wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
769 "waddhus wr10, wr2, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
770 "waddhus wr11, wr3, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
771 "waddhus wr8, wr8, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
772 "waddhus wr9, wr9, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
773 "waddhus wr10, wr10, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
774 "waddhus wr11, wr11, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
775 "wsrlhg wr8, wr8, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
776 "wsrlhg wr9, wr9, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
777 "wsrlhg wr10, wr10, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
778 "wsrlhg wr11, wr11, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
779 "wpackhus wr8, wr8, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
780 "wpackhus wr9, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
781 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
782 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
783 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
784 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
785 // [wr0 wr1 wr2 wr3] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
786 // [wr4 wr5 wr6 wr7] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
787 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
788 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
789 "wldrd wr14, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
790 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
791 "walignr1 wr2, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
792 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
793 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
794 "walignr1 wr3, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
795 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
796 "wmoveq wr11, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
797 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
798 "walignr2ne wr11, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
799 "wunpckelub wr0, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
800 "wunpckehub wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
801 "wunpckelub wr2, wr3 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
802 "wunpckehub wr3, wr3 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
803 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
804 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
805 "wunpckelub wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
806 "wunpckehub wr11, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
807 "waddhus wr0, wr0, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
808 "waddhus wr1, wr1, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
809 "waddhus wr2, wr2, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
810 "waddhus wr3, wr3, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
811 "waddhus wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
812 "waddhus wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
813 "waddhus wr10, wr2, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
814 "waddhus wr11, wr3, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
815 "waddhus wr8, wr8, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
816 "waddhus wr9, wr9, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
817 "waddhus wr10, wr10, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
818 "waddhus wr11, wr11, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
819 "wsrlhg wr8, wr8, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
820 "wsrlhg wr9, wr9, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
821 "wsrlhg wr10, wr10, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
822 "wsrlhg wr11, wr11, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
823 "wpackhus wr8, wr8, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
824 "wpackhus wr9, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
825 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
826 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
827 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
828 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
829 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
830 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
831 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
832 : [line_size]"r"(line_size) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
833 : "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
834 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
835 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
836 void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
837 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
838 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
839 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
840 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
841 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
842 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
843 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
844 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
845 "mov r12, #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
846 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
847 "tmcr wcgr0, r12 \n\t" /* for shift value */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
848 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
849 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
850 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
851 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
852 // [wr0 wr1 wr2 wr3] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
853 // [wr4 wr5 wr6 wr7] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
854 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
855 "add r12, r12, #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
856 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
857 "tmcr wcgr2, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
858 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
859 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
860 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
861 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
862 "walignr1 wr2, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
863 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
864 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
865 "wunpckelub wr0, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
866 "wunpckehub wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
867 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
868 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
869 "waddhus wr0, wr0, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
870 "waddhus wr1, wr1, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
871 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
872 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
873 // [wr0 wr1 wr2 wr3] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
874 // [wr4 wr5 wr6 wr7] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
875 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
876 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
877 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
878 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
879 "walignr1 wr6, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
880 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
881 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
882 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
883 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
884 "wunpckelub wr4, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
885 "wunpckehub wr5, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
886 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
887 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
888 "waddhus wr4, wr4, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
889 "waddhus wr5, wr5, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
890 "waddhus wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
891 "waddhus wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
892 "waddhus wr8, wr8, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
893 "waddhus wr9, wr9, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
894 "wldrd wr12, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
895 "wsrlhg wr8, wr8, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
896 "wsrlhg wr9, wr9, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
897 "wpackhus wr8, wr8, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
898 WAVG2B" wr8, wr8, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
899 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
900 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
901 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
902 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
903 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
904 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
905 // [wr0 wr1 wr2 wr3] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
906 // [wr4 wr5 wr6 wr7] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
907 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
908 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
909 "walignr1 wr2, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
910 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
911 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
912 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
913 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
914 "wunpckelub wr0, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
915 "wunpckehub wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
916 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
917 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
918 "waddhus wr0, wr0, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
919 "waddhus wr1, wr1, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
920 "waddhus wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
921 "waddhus wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
922 "waddhus wr8, wr8, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
923 "waddhus wr9, wr9, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
924 "wldrd wr12, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
925 "wsrlhg wr8, wr8, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
926 "wsrlhg wr9, wr9, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
927 "wpackhus wr8, wr8, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
928 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
929 WAVG2B" wr8, wr8, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
930 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
931 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
932 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
933 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
934 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
935 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
936 : [line_size]"r"(line_size) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
937 : "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
938 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
939 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
940 void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
941 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
942 // [wr0 wr1 wr2 wr3] for previous line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
943 // [wr4 wr5 wr6 wr7] for current line |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
944 SET_RND(wr15); // =2 for rnd and =1 for no_rnd version |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
945 __asm__ __volatile__( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
946 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
947 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
948 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
949 "mov r12, #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
950 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
951 "tmcr wcgr0, r12 \n\t" /* for shift value */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
952 /* alignment */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
953 "and r12, %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
954 "bic %[pixels], %[pixels], #7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
955 "tmcr wcgr1, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
956 "add r12, r12, #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
957 "tmcr wcgr2, r12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
958 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
959 // [wr0 wr1 wr2 wr3] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
960 // [wr4 wr5 wr6 wr7] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
961 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
962 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
963 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
964 "wldrd wr14, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
965 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
966 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
967 "walignr1 wr2, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
968 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
969 "walignr1 wr3, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
970 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
971 "wmoveq wr11, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
972 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
973 "walignr2ne wr11, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
974 "wunpckelub wr0, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
975 "wunpckehub wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
976 "wunpckelub wr2, wr3 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
977 "wunpckehub wr3, wr3 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
978 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
979 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
980 "wunpckelub wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
981 "wunpckehub wr11, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
982 "waddhus wr0, wr0, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
983 "waddhus wr1, wr1, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
984 "waddhus wr2, wr2, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
985 "waddhus wr3, wr3, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
986 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
987 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
988 // [wr0 wr1 wr2 wr3] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
989 // [wr4 wr5 wr6 wr7] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
990 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
991 "cmp r12, #8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
992 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
993 "wldrd wr14, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
994 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
995 "walignr1 wr6, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
996 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
997 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
998 "walignr1 wr7, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
999 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1000 "wmoveq wr11, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1001 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1002 "walignr2ne wr11, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1003 "wunpckelub wr4, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1004 "wunpckehub wr5, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1005 "wunpckelub wr6, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1006 "wunpckehub wr7, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1007 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1008 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1009 "wunpckelub wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1010 "wunpckehub wr11, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1011 "waddhus wr4, wr4, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1012 "waddhus wr5, wr5, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1013 "waddhus wr6, wr6, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1014 "waddhus wr7, wr7, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1015 "waddhus wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1016 "waddhus wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1017 "waddhus wr10, wr2, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1018 "waddhus wr11, wr3, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1019 "waddhus wr8, wr8, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1020 "waddhus wr9, wr9, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1021 "waddhus wr10, wr10, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1022 "waddhus wr11, wr11, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1023 "wsrlhg wr8, wr8, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1024 "wsrlhg wr9, wr9, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1025 "wldrd wr12, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1026 "wldrd wr13, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1027 "wsrlhg wr10, wr10, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1028 "wsrlhg wr11, wr11, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1029 "wpackhus wr8, wr8, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1030 "wpackhus wr9, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1031 WAVG2B" wr8, wr8, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1032 WAVG2B" wr9, wr9, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1033 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1034 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1035 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1036 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1037 // [wr0 wr1 wr2 wr3] <= * |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1038 // [wr4 wr5 wr6 wr7] |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1039 "wldrd wr12, [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1040 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1041 "wldrd wr13, [%[pixels], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1042 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1043 "wldrd wr14, [%[pixels], #16] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1044 "add %[pixels], %[pixels], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1045 "walignr1 wr2, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1046 "pld [%[pixels]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1047 "pld [%[pixels], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1048 "walignr1 wr3, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1049 "wmoveq wr10, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1050 "wmoveq wr11, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1051 "walignr2ne wr10, wr12, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1052 "walignr2ne wr11, wr13, wr14 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1053 "wunpckelub wr0, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1054 "wunpckehub wr1, wr2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1055 "wunpckelub wr2, wr3 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1056 "wunpckehub wr3, wr3 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1057 "wunpckelub wr8, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1058 "wunpckehub wr9, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1059 "wunpckelub wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1060 "wunpckehub wr11, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1061 "waddhus wr0, wr0, wr8 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1062 "waddhus wr1, wr1, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1063 "waddhus wr2, wr2, wr10 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1064 "waddhus wr3, wr3, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1065 "waddhus wr8, wr0, wr4 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1066 "waddhus wr9, wr1, wr5 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1067 "waddhus wr10, wr2, wr6 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1068 "waddhus wr11, wr3, wr7 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1069 "waddhus wr8, wr8, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1070 "waddhus wr9, wr9, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1071 "waddhus wr10, wr10, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1072 "waddhus wr11, wr11, wr15 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1073 "wsrlhg wr8, wr8, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1074 "wsrlhg wr9, wr9, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1075 "wldrd wr12, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1076 "wldrd wr13, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1077 "wsrlhg wr10, wr10, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1078 "wsrlhg wr11, wr11, wcgr0 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1079 "wpackhus wr8, wr8, wr9 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1080 "wpackhus wr9, wr10, wr11 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1081 WAVG2B" wr8, wr8, wr12 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1082 WAVG2B" wr9, wr9, wr13 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1083 "wstrd wr8, [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1084 "wstrd wr9, [%[block], #8] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1085 "add %[block], %[block], %[line_size] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1086 "subs %[h], %[h], #2 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1087 "pld [%[block]] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1088 "pld [%[block], #32] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1089 "bne 1b \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1090 : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1091 : [line_size]"r"(line_size) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1092 : "r12", "memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1093 } |