annotate armv4l/simple_idct_armv6.S @ 6323:e6da66f378c7 libavcodec

mpegvideo.h has two function declarations with the 'inline' specifier but no definition for those functions. The C standard requires a definition to appear in the same translation unit for any function declared with 'inline'. Most of the files including mpegvideo.h do not define those functions. Fix this by removing the 'inline' specifiers from the header. patch by Uoti Urpala
author diego
date Sun, 03 Feb 2008 17:54:30 +0000
parents 744e91a36a23
children 316762ae96a7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
1 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
2 * Simple IDCT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
3 *
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5220
744e91a36a23 update my email address
mru
parents: 4867
diff changeset
5 * Copyright (c) 2007 Mans Rullgard <mans@mansr.com>
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
6 *
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
7 * This file is part of FFmpeg.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
8 *
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
9 * FFmpeg is free software; you can redistribute it and/or
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
10 * modify it under the terms of the GNU Lesser General Public
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
11 * License as published by the Free Software Foundation; either
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
12 * version 2.1 of the License, or (at your option) any later version.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
13 *
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
14 * FFmpeg is distributed in the hope that it will be useful,
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
17 * Lesser General Public License for more details.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
18 *
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
19 * You should have received a copy of the GNU Lesser General Public
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
20 * License along with FFmpeg; if not, write to the Free Software
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
22 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
23
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
24 #define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
25 #define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
26 #define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
27 #define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
28 #define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
29 #define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
30 #define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
31 #define ROW_SHIFT 11
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
32 #define COL_SHIFT 20
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
33
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
34 #define W13 (W1 | (W3 << 16))
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
35 #define W26 (W2 | (W6 << 16))
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
36 #define W42 (W4 | (W2 << 16))
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
37 #define W42n (-W4&0xffff | (-W2 << 16))
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
38 #define W46 (W4 | (W6 << 16))
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
39 #define W57 (W5 | (W7 << 16))
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
40
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
41 .text
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
42 .align
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
43 w13: .long W13
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
44 w26: .long W26
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
45 w42: .long W42
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
46 w42n: .long W42n
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
47 w46: .long W46
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
48 w57: .long W57
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
49
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
50 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
51 Compute partial IDCT of single row.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
52 shift = left-shift amount
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
53 a1 = source address
4458
9efb3639d7fe save one cycle
mru
parents: 4457
diff changeset
54 a3 = row[2,0] <= 2 cycles
4457
99e960878498 remove redundant loads
mru
parents: 4452
diff changeset
55 a4 = row[3,1]
4458
9efb3639d7fe save one cycle
mru
parents: 4457
diff changeset
56 ip = w42 <= 2 cycles
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
57
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
58 Output in registers v1--v8
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
59 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
60 .macro idct_row shift
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
61 ldr lr, [pc, #(w46-.-8)] /* lr = W4 | (W6 << 16) */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
62 mov a2, #(1<<(\shift-1))
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
63 smlad v1, a3, ip, a2
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
64 smlsd v4, a3, ip, a2
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
65 ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
66 ldr v7, [pc, #(w57-.-8)] /* v7 = W5 | (W7 << 16) */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
67 smlad v2, a3, lr, a2
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
68 smlsd v3, a3, lr, a2
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
69
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
70 smuad v5, a4, ip /* v5 = B0 = W1*row[1] + W3*row[3] */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
71 smusdx fp, a4, v7 /* fp = B3 = W7*row[1] - W5*row[3] */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
72 ldr lr, [a1, #12] /* lr = row[7,5] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
73 pkhtb a3, ip, v7, asr #16 /* a4 = W7 | (W3 << 16) */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
74 pkhbt a2, ip, v7, lsl #16 /* a2 = W1 | (W5 << 16) */
4434
cab2986ffc0b theoretically save one cycle
mru
parents: 4427
diff changeset
75 smusdx v6, a3, a4 /* v6 = -B1 = W7*row[3] - W3*row[1] */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
76 smlad v5, lr, v7, v5 /* B0 += W5*row[5] + W7*row[7] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
77 smusdx v7, a4, a2 /* v7 = B2 = W5*row[1] - W1*row[3] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
78
4434
cab2986ffc0b theoretically save one cycle
mru
parents: 4427
diff changeset
79 ldr a4, [pc, #(w42n-.-8)] /* a4 = -W4 | (-W2 << 16) */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
80 smlad v7, lr, a3, v7 /* B2 += W7*row[5] + W3*row[7] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
81 ldr a3, [a1, #4] /* a3 = row[6,4] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
82 smlsdx fp, lr, ip, fp /* B3 += W3*row[5] - W1*row[7] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
83 ldr ip, [pc, #(w46-.-8)] /* ip = W4 | (W6 << 16) */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
84 smlad v6, lr, a2, v6 /* B1 -= W1*row[5] + W5*row[7] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
85
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
86 smlad v2, a3, a4, v2 /* A1 += -W4*row[4] - W2*row[6] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
87 smlsd v3, a3, a4, v3 /* A2 += -W4*row[4] + W2*row[6] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
88 smlad v1, a3, ip, v1 /* A0 += W4*row[4] + W6*row[6] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
89 smlsd v4, a3, ip, v4 /* A3 += W4*row[4] - W6*row[6] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
90 .endm
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
91
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
92 /*
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
93 Compute partial IDCT of half row.
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
94 shift = left-shift amount
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
95 a3 = row[2,0]
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
96 a4 = row[3,1]
4460
e7f56ad89720 missed a redundant load
mru
parents: 4458
diff changeset
97 ip = w42
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
98
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
99 Output in registers v1--v8
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
100 */
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
101 .macro idct_row4 shift
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
102 ldr lr, [pc, #(w46-.-8)] /* lr = W4 | (W6 << 16) */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
103 ldr v7, [pc, #(w57-.-8)] /* v7 = W5 | (W7 << 16) */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
104 mov a2, #(1<<(\shift-1))
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
105 smlad v1, a3, ip, a2
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
106 smlsd v4, a3, ip, a2
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
107 ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
108 smlad v2, a3, lr, a2
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
109 smlsd v3, a3, lr, a2
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
110 smusdx fp, a4, v7 /* fp = B3 = W7*row[1] - W5*row[3] */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
111 smuad v5, a4, ip /* v5 = B0 = W1*row[1] + W3*row[3] */
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
112 pkhtb a3, ip, v7, asr #16 /* a4 = W7 | (W3 << 16) */
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
113 pkhbt a2, ip, v7, lsl #16 /* a2 = W1 | (W5 << 16) */
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
114 smusdx v6, a3, a4 /* v6 = -B1 = W7*row[3] - W3*row[1] */
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
115 smusdx v7, a4, a2 /* v7 = B2 = W5*row[1] - W1*row[3] */
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
116 .endm
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
117
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
118 /*
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
119 Compute final part of IDCT single row without shift.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
120 Input in registers v1--v8
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
121 Output in registers ip, v1--v3, lr, v5--v7
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
122 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
123 .macro idct_finish
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
124 add ip, v1, v5 /* a2 = A0 + B0 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
125 sub lr, v1, v5 /* a3 = A0 - B0 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
126 sub v1, v2, v6 /* a3 = A1 + B1 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
127 add v5, v2, v6 /* a3 = A1 - B1 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
128 add v2, v3, v7 /* a2 = A2 + B2 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
129 sub v6, v3, v7 /* a2 = A2 - B2 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
130 add v3, v4, fp /* a3 = A3 + B3 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
131 sub v7, v4, fp /* a3 = A3 - B3 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
132 .endm
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
133
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
134 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
135 Compute final part of IDCT single row.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
136 shift = right-shift amount
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
137 Input/output in registers v1--v8
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
138 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
139 .macro idct_finish_shift shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
140 add a4, v1, v5 /* a4 = A0 + B0 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
141 sub a3, v1, v5 /* a3 = A0 - B0 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
142 mov v1, a4, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
143 mov v5, a3, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
144
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
145 sub a4, v2, v6 /* a4 = A1 + B1 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
146 add a3, v2, v6 /* a3 = A1 - B1 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
147 mov v2, a4, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
148 mov v6, a3, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
149
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
150 add a4, v3, v7 /* a4 = A2 + B2 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
151 sub a3, v3, v7 /* a3 = A2 - B2 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
152 mov v3, a4, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
153 mov v7, a3, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
154
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
155 add a4, v4, fp /* a4 = A3 + B3 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
156 sub a3, v4, fp /* a3 = A3 - B3 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
157 mov v4, a4, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
158 mov fp, a3, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
159 .endm
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
160
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
161 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
162 Compute final part of IDCT single row, saturating results at 8 bits.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
163 shift = right-shift amount
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
164 Input/output in registers v1--v8
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
165 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
166 .macro idct_finish_shift_sat shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
167 add a4, v1, v5 /* a4 = A0 + B0 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
168 sub ip, v1, v5 /* ip = A0 - B0 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
169 usat v1, #8, a4, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
170 usat v5, #8, ip, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
171
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
172 sub a4, v2, v6 /* a4 = A1 + B1 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
173 add ip, v2, v6 /* ip = A1 - B1 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
174 usat v2, #8, a4, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
175 usat v6, #8, ip, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
176
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
177 add a4, v3, v7 /* a4 = A2 + B2 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
178 sub ip, v3, v7 /* ip = A2 - B2 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
179 usat v3, #8, a4, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
180 usat v7, #8, ip, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
181
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
182 add a4, v4, fp /* a4 = A3 + B3 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
183 sub ip, v4, fp /* ip = A3 - B3 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
184 usat v4, #8, a4, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
185 usat fp, #8, ip, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
186 .endm
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
187
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
188 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
189 Compute IDCT of single row, storing as column.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
190 a1 = source
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
191 a2 = dest
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
192 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
193 .align
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 4483
diff changeset
194 .type idct_row_armv6, %function
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
195 .func idct_row_armv6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
196 idct_row_armv6:
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
197 str lr, [sp, #-4]!
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
198
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
199 ldr lr, [a1, #12] /* lr = row[7,5] */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
200 ldr ip, [a1, #4] /* ip = row[6,4] */
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
201 ldr a4, [a1, #8] /* a4 = row[3,1] */
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
202 ldr a3, [a1] /* a3 = row[2,0] */
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
203 orrs lr, lr, ip
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
204 cmpeq lr, a4
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
205 cmpeq lr, a3, lsr #16
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
206 beq 1f
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
207 str a2, [sp, #-4]!
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
208 ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
209 cmp lr, #0
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
210 beq 2f
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
211
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
212 idct_row ROW_SHIFT
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
213 b 3f
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
214
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
215 2: idct_row4 ROW_SHIFT
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
216
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
217 3: ldr a2, [sp], #4
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
218 idct_finish_shift ROW_SHIFT
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
219
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
220 strh v1, [a2]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
221 strh v2, [a2, #(16*2)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
222 strh v3, [a2, #(16*4)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
223 strh v4, [a2, #(16*6)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
224 strh fp, [a2, #(16*1)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
225 strh v7, [a2, #(16*3)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
226 strh v6, [a2, #(16*5)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
227 strh v5, [a2, #(16*7)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
228
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
229 ldr pc, [sp], #4
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
230
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
231 1: mov a3, a3, lsl #3
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
232 strh a3, [a2]
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
233 strh a3, [a2, #(16*2)]
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
234 strh a3, [a2, #(16*4)]
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
235 strh a3, [a2, #(16*6)]
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
236 strh a3, [a2, #(16*1)]
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
237 strh a3, [a2, #(16*3)]
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
238 strh a3, [a2, #(16*5)]
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
239 strh a3, [a2, #(16*7)]
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
240 ldr pc, [sp], #4
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
241 .endfunc
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
242
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
243 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
244 Compute IDCT of single column, read as row.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
245 a1 = source
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
246 a2 = dest
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
247 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
248 .align
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 4483
diff changeset
249 .type idct_col_armv6, %function
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
250 .func idct_col_armv6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
251 idct_col_armv6:
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
252 stmfd sp!, {a2, lr}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
253
4457
99e960878498 remove redundant loads
mru
parents: 4452
diff changeset
254 ldr a3, [a1] /* a3 = row[2,0] */
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
255 ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */
4457
99e960878498 remove redundant loads
mru
parents: 4452
diff changeset
256 ldr a4, [a1, #8] /* a4 = row[3,1] */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
257 idct_row COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
258 ldr a2, [sp], #4
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
259 idct_finish_shift COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
260
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
261 strh v1, [a2]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
262 strh v2, [a2, #(16*1)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
263 strh v3, [a2, #(16*2)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
264 strh v4, [a2, #(16*3)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
265 strh fp, [a2, #(16*4)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
266 strh v7, [a2, #(16*5)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
267 strh v6, [a2, #(16*6)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
268 strh v5, [a2, #(16*7)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
269
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
270 ldr pc, [sp], #4
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
271 .endfunc
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
272
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
273 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
274 Compute IDCT of single column, read as row, store saturated 8-bit.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
275 a1 = source
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
276 a2 = dest
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
277 a3 = line size
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
278 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
279 .align
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 4483
diff changeset
280 .type idct_col_put_armv6, %function
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
281 .func idct_col_put_armv6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
282 idct_col_put_armv6:
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
283 stmfd sp!, {a2, a3, lr}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
284
4457
99e960878498 remove redundant loads
mru
parents: 4452
diff changeset
285 ldr a3, [a1] /* a3 = row[2,0] */
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
286 ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */
4457
99e960878498 remove redundant loads
mru
parents: 4452
diff changeset
287 ldr a4, [a1, #8] /* a4 = row[3,1] */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
288 idct_row COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
289 ldmfd sp!, {a2, a3}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
290 idct_finish_shift_sat COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
291
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
292 strb v1, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
293 strb v2, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
294 strb v3, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
295 strb v4, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
296 strb fp, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
297 strb v7, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
298 strb v6, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
299 strb v5, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
300
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
301 sub a2, a2, a3, lsl #3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
302
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
303 ldr pc, [sp], #4
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
304 .endfunc
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
305
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
306 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
307 Compute IDCT of single column, read as row, add/store saturated 8-bit.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
308 a1 = source
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
309 a2 = dest
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
310 a3 = line size
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
311 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
312 .align
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 4483
diff changeset
313 .type idct_col_add_armv6, %function
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
314 .func idct_col_add_armv6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
315 idct_col_add_armv6:
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
316 stmfd sp!, {a2, a3, lr}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
317
4457
99e960878498 remove redundant loads
mru
parents: 4452
diff changeset
318 ldr a3, [a1] /* a3 = row[2,0] */
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
319 ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */
4457
99e960878498 remove redundant loads
mru
parents: 4452
diff changeset
320 ldr a4, [a1, #8] /* a4 = row[3,1] */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
321 idct_row COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
322 ldmfd sp!, {a2, a3}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
323 idct_finish
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
324
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
325 ldrb a4, [a2]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
326 ldrb v4, [a2, a3]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
327 ldrb fp, [a2, a3, lsl #2]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
328 add ip, a4, ip, asr #COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
329 usat ip, #8, ip
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
330 add v1, v4, v1, asr #COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
331 strb ip, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
332 ldrb ip, [a2, a3]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
333 usat v1, #8, v1
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
334 ldrb fp, [a2, a3, lsl #2]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
335 add v2, ip, v2, asr #COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
336 usat v2, #8, v2
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
337 strb v1, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
338 ldrb a4, [a2, a3]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
339 ldrb ip, [a2, a3, lsl #2]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
340 strb v2, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
341 ldrb v4, [a2, a3]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
342 ldrb v1, [a2, a3, lsl #2]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
343 add v3, a4, v3, asr #COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
344 usat v3, #8, v3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
345 add v7, v4, v7, asr #COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
346 usat v7, #8, v7
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
347 add v6, fp, v6, asr #COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
348 usat v6, #8, v6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
349 add v5, ip, v5, asr #COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
350 usat v5, #8, v5
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
351 add lr, v1, lr, asr #COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
352 usat lr, #8, lr
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
353 strb v3, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
354 strb v7, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
355 strb v6, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
356 strb v5, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
357 strb lr, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
358
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
359 sub a2, a2, a3, lsl #3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
360
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
361 ldr pc, [sp], #4
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
362 .endfunc
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
363
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
364 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
365 Compute 8 IDCT row transforms.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
366 func = IDCT row->col function
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
367 width = width of columns in bytes
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
368 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
369 .macro idct_rows func width
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
370 bl \func
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
371 add a1, a1, #(16*2)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
372 add a2, a2, #\width
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
373 bl \func
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
374 add a1, a1, #(16*2)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
375 add a2, a2, #\width
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
376 bl \func
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
377 add a1, a1, #(16*2)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
378 add a2, a2, #\width
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
379 bl \func
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
380 sub a1, a1, #(16*5)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
381 add a2, a2, #\width
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
382 bl \func
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
383 add a1, a1, #(16*2)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
384 add a2, a2, #\width
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
385 bl \func
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
386 add a1, a1, #(16*2)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
387 add a2, a2, #\width
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
388 bl \func
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
389 add a1, a1, #(16*2)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
390 add a2, a2, #\width
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
391 bl \func
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
392
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
393 sub a1, a1, #(16*7)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
394 .endm
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
395
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
396 .align
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
397 .global ff_simple_idct_armv6
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 4483
diff changeset
398 .type ff_simple_idct_armv6, %function
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
399 .func ff_simple_idct_armv6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
400 /* void ff_simple_idct_armv6(DCTELEM *data); */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
401 ff_simple_idct_armv6:
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
402 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
403 sub sp, sp, #128
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
404
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
405 mov a2, sp
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
406 idct_rows idct_row_armv6, 2
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
407 mov a2, a1
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
408 mov a1, sp
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
409 idct_rows idct_col_armv6, 2
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
410
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
411 add sp, sp, #128
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
412 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
413 .endfunc
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
414
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
415 .align
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
416 .global ff_simple_idct_add_armv6
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 4483
diff changeset
417 .type ff_simple_idct_add_armv6, %function
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
418 .func ff_simple_idct_add_armv6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
419 /* ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
420 ff_simple_idct_add_armv6:
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
421 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
422 sub sp, sp, #128
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
423
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
424 mov a1, a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
425 mov a2, sp
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
426 idct_rows idct_row_armv6, 2
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
427 mov a1, sp
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
428 ldr a2, [sp, #128]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
429 ldr a3, [sp, #(128+4)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
430 idct_rows idct_col_add_armv6, 1
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
431
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
432 add sp, sp, #(128+8)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
433 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
434 .endfunc
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
435
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
436 .align
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
437 .global ff_simple_idct_put_armv6
4867
97d82c7585b4 add .type foo, %function directives for the benefit of debuggers
mru
parents: 4483
diff changeset
438 .type ff_simple_idct_put_armv6, %function
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
439 .func ff_simple_idct_put_armv6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
440 /* ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
441 ff_simple_idct_put_armv6:
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
442 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
443 sub sp, sp, #128
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
444
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
445 mov a1, a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
446 mov a2, sp
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
447 idct_rows idct_row_armv6, 2
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
448 mov a1, sp
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
449 ldr a2, [sp, #128]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
450 ldr a3, [sp, #(128+4)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
451 idct_rows idct_col_put_armv6, 1
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
452
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
453 add sp, sp, #(128+8)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
454 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
455 .endfunc