annotate armv4l/simple_idct_armv6.S @ 4580:55d7ebd2d699 libavcodec

fix chroma mc2 bug, this is based on a patch by (Oleg Metelitsa oleg hitron co kr) and does slow the mc2 chroma put down, avg interrestingly seems unaffected speedwise on duron this of course should be rather done in a way which doesnt slow it down but its better a few % slower but correct then incorrect
author michael
date Fri, 23 Feb 2007 14:29:13 +0000
parents 7a56dc39adef
children 97d82c7585b4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
1 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
2 * Simple IDCT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
3 *
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
5 * Copyright (c) 2007 Mans Rullgard <mru@inprovide.com>
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
6 *
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
7 * This file is part of FFmpeg.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
8 *
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
9 * FFmpeg is free software; you can redistribute it and/or
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
10 * modify it under the terms of the GNU Lesser General Public
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
11 * License as published by the Free Software Foundation; either
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
12 * version 2.1 of the License, or (at your option) any later version.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
13 *
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
14 * FFmpeg is distributed in the hope that it will be useful,
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
17 * Lesser General Public License for more details.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
18 *
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
19 * You should have received a copy of the GNU Lesser General Public
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
20 * License along with FFmpeg; if not, write to the Free Software
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
22 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
23
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
24 #define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
25 #define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
26 #define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
27 #define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
28 #define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
29 #define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
30 #define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
31 #define ROW_SHIFT 11
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
32 #define COL_SHIFT 20
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
33
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
34 #define W13 (W1 | (W3 << 16))
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
35 #define W26 (W2 | (W6 << 16))
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
36 #define W42 (W4 | (W2 << 16))
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
37 #define W42n (-W4&0xffff | (-W2 << 16))
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
38 #define W46 (W4 | (W6 << 16))
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
39 #define W57 (W5 | (W7 << 16))
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
40
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
41 .text
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
42 .align
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
43 w13: .long W13
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
44 w26: .long W26
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
45 w42: .long W42
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
46 w42n: .long W42n
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
47 w46: .long W46
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
48 w57: .long W57
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
49
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
50 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
51 Compute partial IDCT of single row.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
52 shift = left-shift amount
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
53 a1 = source address
4458
9efb3639d7fe save one cycle
mru
parents: 4457
diff changeset
54 a3 = row[2,0] <= 2 cycles
4457
99e960878498 remove redundant loads
mru
parents: 4452
diff changeset
55 a4 = row[3,1]
4458
9efb3639d7fe save one cycle
mru
parents: 4457
diff changeset
56 ip = w42 <= 2 cycles
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
57
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
58 Output in registers v1--v8
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
59 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
60 .macro idct_row shift
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
61 ldr lr, [pc, #(w46-.-8)] /* lr = W4 | (W6 << 16) */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
62 mov a2, #(1<<(\shift-1))
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
63 smlad v1, a3, ip, a2
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
64 smlsd v4, a3, ip, a2
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
65 ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
66 ldr v7, [pc, #(w57-.-8)] /* v7 = W5 | (W7 << 16) */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
67 smlad v2, a3, lr, a2
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
68 smlsd v3, a3, lr, a2
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
69
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
70 smuad v5, a4, ip /* v5 = B0 = W1*row[1] + W3*row[3] */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
71 smusdx fp, a4, v7 /* fp = B3 = W7*row[1] - W5*row[3] */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
72 ldr lr, [a1, #12] /* lr = row[7,5] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
73 pkhtb a3, ip, v7, asr #16 /* a4 = W7 | (W3 << 16) */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
74 pkhbt a2, ip, v7, lsl #16 /* a2 = W1 | (W5 << 16) */
4434
cab2986ffc0b theoretically save one cycle
mru
parents: 4427
diff changeset
75 smusdx v6, a3, a4 /* v6 = -B1 = W7*row[3] - W3*row[1] */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
76 smlad v5, lr, v7, v5 /* B0 += W5*row[5] + W7*row[7] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
77 smusdx v7, a4, a2 /* v7 = B2 = W5*row[1] - W1*row[3] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
78
4434
cab2986ffc0b theoretically save one cycle
mru
parents: 4427
diff changeset
79 ldr a4, [pc, #(w42n-.-8)] /* a4 = -W4 | (-W2 << 16) */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
80 smlad v7, lr, a3, v7 /* B2 += W7*row[5] + W3*row[7] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
81 ldr a3, [a1, #4] /* a3 = row[6,4] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
82 smlsdx fp, lr, ip, fp /* B3 += W3*row[5] - W1*row[7] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
83 ldr ip, [pc, #(w46-.-8)] /* ip = W4 | (W6 << 16) */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
84 smlad v6, lr, a2, v6 /* B1 -= W1*row[5] + W5*row[7] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
85
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
86 smlad v2, a3, a4, v2 /* A1 += -W4*row[4] - W2*row[6] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
87 smlsd v3, a3, a4, v3 /* A2 += -W4*row[4] + W2*row[6] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
88 smlad v1, a3, ip, v1 /* A0 += W4*row[4] + W6*row[6] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
89 smlsd v4, a3, ip, v4 /* A3 += W4*row[4] - W6*row[6] */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
90 .endm
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
91
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
92 /*
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
93 Compute partial IDCT of half row.
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
94 shift = left-shift amount
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
95 a3 = row[2,0]
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
96 a4 = row[3,1]
4460
e7f56ad89720 missed a redundant load
mru
parents: 4458
diff changeset
97 ip = w42
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
98
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
99 Output in registers v1--v8
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
100 */
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
101 .macro idct_row4 shift
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
102 ldr lr, [pc, #(w46-.-8)] /* lr = W4 | (W6 << 16) */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
103 ldr v7, [pc, #(w57-.-8)] /* v7 = W5 | (W7 << 16) */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
104 mov a2, #(1<<(\shift-1))
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
105 smlad v1, a3, ip, a2
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
106 smlsd v4, a3, ip, a2
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
107 ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
108 smlad v2, a3, lr, a2
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
109 smlsd v3, a3, lr, a2
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
110 smusdx fp, a4, v7 /* fp = B3 = W7*row[1] - W5*row[3] */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
111 smuad v5, a4, ip /* v5 = B0 = W1*row[1] + W3*row[3] */
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
112 pkhtb a3, ip, v7, asr #16 /* a4 = W7 | (W3 << 16) */
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
113 pkhbt a2, ip, v7, lsl #16 /* a2 = W1 | (W5 << 16) */
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
114 smusdx v6, a3, a4 /* v6 = -B1 = W7*row[3] - W3*row[1] */
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
115 smusdx v7, a4, a2 /* v7 = B2 = W5*row[1] - W1*row[3] */
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
116 .endm
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
117
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
118 /*
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
119 Compute final part of IDCT single row without shift.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
120 Input in registers v1--v8
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
121 Output in registers ip, v1--v3, lr, v5--v7
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
122 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
123 .macro idct_finish
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
124 add ip, v1, v5 /* a2 = A0 + B0 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
125 sub lr, v1, v5 /* a3 = A0 - B0 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
126 sub v1, v2, v6 /* a3 = A1 + B1 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
127 add v5, v2, v6 /* a3 = A1 - B1 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
128 add v2, v3, v7 /* a2 = A2 + B2 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
129 sub v6, v3, v7 /* a2 = A2 - B2 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
130 add v3, v4, fp /* a3 = A3 + B3 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
131 sub v7, v4, fp /* a3 = A3 - B3 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
132 .endm
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
133
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
134 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
135 Compute final part of IDCT single row.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
136 shift = right-shift amount
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
137 Input/output in registers v1--v8
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
138 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
139 .macro idct_finish_shift shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
140 add a4, v1, v5 /* a4 = A0 + B0 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
141 sub a3, v1, v5 /* a3 = A0 - B0 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
142 mov v1, a4, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
143 mov v5, a3, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
144
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
145 sub a4, v2, v6 /* a4 = A1 + B1 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
146 add a3, v2, v6 /* a3 = A1 - B1 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
147 mov v2, a4, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
148 mov v6, a3, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
149
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
150 add a4, v3, v7 /* a4 = A2 + B2 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
151 sub a3, v3, v7 /* a3 = A2 - B2 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
152 mov v3, a4, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
153 mov v7, a3, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
154
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
155 add a4, v4, fp /* a4 = A3 + B3 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
156 sub a3, v4, fp /* a3 = A3 - B3 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
157 mov v4, a4, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
158 mov fp, a3, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
159 .endm
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
160
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
161 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
162 Compute final part of IDCT single row, saturating results at 8 bits.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
163 shift = right-shift amount
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
164 Input/output in registers v1--v8
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
165 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
166 .macro idct_finish_shift_sat shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
167 add a4, v1, v5 /* a4 = A0 + B0 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
168 sub ip, v1, v5 /* ip = A0 - B0 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
169 usat v1, #8, a4, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
170 usat v5, #8, ip, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
171
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
172 sub a4, v2, v6 /* a4 = A1 + B1 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
173 add ip, v2, v6 /* ip = A1 - B1 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
174 usat v2, #8, a4, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
175 usat v6, #8, ip, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
176
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
177 add a4, v3, v7 /* a4 = A2 + B2 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
178 sub ip, v3, v7 /* ip = A2 - B2 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
179 usat v3, #8, a4, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
180 usat v7, #8, ip, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
181
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
182 add a4, v4, fp /* a4 = A3 + B3 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
183 sub ip, v4, fp /* ip = A3 - B3 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
184 usat v4, #8, a4, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
185 usat fp, #8, ip, asr #\shift
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
186 .endm
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
187
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
188 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
189 Compute IDCT of single row, storing as column.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
190 a1 = source
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
191 a2 = dest
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
192 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
193 .align
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
194 .func idct_row_armv6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
195 idct_row_armv6:
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
196 str lr, [sp, #-4]!
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
197
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
198 ldr lr, [a1, #12] /* lr = row[7,5] */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
199 ldr ip, [a1, #4] /* ip = row[6,4] */
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
200 ldr a4, [a1, #8] /* a4 = row[3,1] */
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
201 ldr a3, [a1] /* a3 = row[2,0] */
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
202 orrs lr, lr, ip
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
203 cmpeq lr, a4
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
204 cmpeq lr, a3, lsr #16
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
205 beq 1f
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
206 str a2, [sp, #-4]!
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
207 ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
208 cmp lr, #0
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
209 beq 2f
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
210
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
211 idct_row ROW_SHIFT
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
212 b 3f
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
213
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
214 2: idct_row4 ROW_SHIFT
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
215
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
216 3: ldr a2, [sp], #4
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
217 idct_finish_shift ROW_SHIFT
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
218
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
219 strh v1, [a2]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
220 strh v2, [a2, #(16*2)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
221 strh v3, [a2, #(16*4)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
222 strh v4, [a2, #(16*6)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
223 strh fp, [a2, #(16*1)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
224 strh v7, [a2, #(16*3)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
225 strh v6, [a2, #(16*5)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
226 strh v5, [a2, #(16*7)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
227
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
228 ldr pc, [sp], #4
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
229
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
230 1: mov a3, a3, lsl #3
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
231 strh a3, [a2]
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
232 strh a3, [a2, #(16*2)]
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
233 strh a3, [a2, #(16*4)]
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
234 strh a3, [a2, #(16*6)]
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
235 strh a3, [a2, #(16*1)]
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
236 strh a3, [a2, #(16*3)]
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
237 strh a3, [a2, #(16*5)]
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
238 strh a3, [a2, #(16*7)]
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
239 ldr pc, [sp], #4
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
240 .endfunc
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
241
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
242 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
243 Compute IDCT of single column, read as row.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
244 a1 = source
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
245 a2 = dest
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
246 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
247 .align
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
248 .func idct_col_armv6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
249 idct_col_armv6:
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
250 stmfd sp!, {a2, lr}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
251
4457
99e960878498 remove redundant loads
mru
parents: 4452
diff changeset
252 ldr a3, [a1] /* a3 = row[2,0] */
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
253 ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */
4457
99e960878498 remove redundant loads
mru
parents: 4452
diff changeset
254 ldr a4, [a1, #8] /* a4 = row[3,1] */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
255 idct_row COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
256 ldr a2, [sp], #4
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
257 idct_finish_shift COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
258
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
259 strh v1, [a2]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
260 strh v2, [a2, #(16*1)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
261 strh v3, [a2, #(16*2)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
262 strh v4, [a2, #(16*3)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
263 strh fp, [a2, #(16*4)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
264 strh v7, [a2, #(16*5)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
265 strh v6, [a2, #(16*6)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
266 strh v5, [a2, #(16*7)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
267
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
268 ldr pc, [sp], #4
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
269 .endfunc
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
270
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
271 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
272 Compute IDCT of single column, read as row, store saturated 8-bit.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
273 a1 = source
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
274 a2 = dest
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
275 a3 = line size
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
276 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
277 .align
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
278 .func idct_col_put_armv6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
279 idct_col_put_armv6:
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
280 stmfd sp!, {a2, a3, lr}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
281
4457
99e960878498 remove redundant loads
mru
parents: 4452
diff changeset
282 ldr a3, [a1] /* a3 = row[2,0] */
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
283 ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */
4457
99e960878498 remove redundant loads
mru
parents: 4452
diff changeset
284 ldr a4, [a1, #8] /* a4 = row[3,1] */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
285 idct_row COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
286 ldmfd sp!, {a2, a3}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
287 idct_finish_shift_sat COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
288
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
289 strb v1, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
290 strb v2, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
291 strb v3, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
292 strb v4, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
293 strb fp, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
294 strb v7, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
295 strb v6, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
296 strb v5, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
297
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
298 sub a2, a2, a3, lsl #3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
299
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
300 ldr pc, [sp], #4
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
301 .endfunc
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
302
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
303 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
304 Compute IDCT of single column, read as row, add/store saturated 8-bit.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
305 a1 = source
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
306 a2 = dest
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
307 a3 = line size
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
308 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
309 .align
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
310 .func idct_col_add_armv6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
311 idct_col_add_armv6:
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
312 stmfd sp!, {a2, a3, lr}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
313
4457
99e960878498 remove redundant loads
mru
parents: 4452
diff changeset
314 ldr a3, [a1] /* a3 = row[2,0] */
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
315 ldr ip, [pc, #(w42-.-8)] /* ip = W4 | (W2 << 16) */
4457
99e960878498 remove redundant loads
mru
parents: 4452
diff changeset
316 ldr a4, [a1, #8] /* a4 = row[3,1] */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
317 idct_row COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
318 ldmfd sp!, {a2, a3}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
319 idct_finish
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
320
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
321 ldrb a4, [a2]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
322 ldrb v4, [a2, a3]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
323 ldrb fp, [a2, a3, lsl #2]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
324 add ip, a4, ip, asr #COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
325 usat ip, #8, ip
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
326 add v1, v4, v1, asr #COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
327 strb ip, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
328 ldrb ip, [a2, a3]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
329 usat v1, #8, v1
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
330 ldrb fp, [a2, a3, lsl #2]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
331 add v2, ip, v2, asr #COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
332 usat v2, #8, v2
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
333 strb v1, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
334 ldrb a4, [a2, a3]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
335 ldrb ip, [a2, a3, lsl #2]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
336 strb v2, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
337 ldrb v4, [a2, a3]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
338 ldrb v1, [a2, a3, lsl #2]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
339 add v3, a4, v3, asr #COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
340 usat v3, #8, v3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
341 add v7, v4, v7, asr #COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
342 usat v7, #8, v7
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
343 add v6, fp, v6, asr #COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
344 usat v6, #8, v6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
345 add v5, ip, v5, asr #COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
346 usat v5, #8, v5
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
347 add lr, v1, lr, asr #COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
348 usat lr, #8, lr
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
349 strb v3, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
350 strb v7, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
351 strb v6, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
352 strb v5, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
353 strb lr, [a2], a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
354
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
355 sub a2, a2, a3, lsl #3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
356
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
357 ldr pc, [sp], #4
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
358 .endfunc
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
359
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
360 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
361 Compute 8 IDCT row transforms.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
362 func = IDCT row->col function
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
363 width = width of columns in bytes
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
364 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
365 .macro idct_rows func width
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
366 bl \func
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
367 add a1, a1, #(16*2)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
368 add a2, a2, #\width
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
369 bl \func
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
370 add a1, a1, #(16*2)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
371 add a2, a2, #\width
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
372 bl \func
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
373 add a1, a1, #(16*2)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
374 add a2, a2, #\width
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
375 bl \func
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
376 sub a1, a1, #(16*5)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
377 add a2, a2, #\width
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
378 bl \func
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
379 add a1, a1, #(16*2)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
380 add a2, a2, #\width
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
381 bl \func
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
382 add a1, a1, #(16*2)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
383 add a2, a2, #\width
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
384 bl \func
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
385 add a1, a1, #(16*2)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
386 add a2, a2, #\width
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
387 bl \func
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
388
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
389 sub a1, a1, #(16*7)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
390 .endm
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
391
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
392 .align
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
393 .global ff_simple_idct_armv6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
394 .func ff_simple_idct_armv6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
395 /* void ff_simple_idct_armv6(DCTELEM *data); */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
396 ff_simple_idct_armv6:
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
397 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
398 sub sp, sp, #128
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
399
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
400 mov a2, sp
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
401 idct_rows idct_row_armv6, 2
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
402 mov a2, a1
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
403 mov a1, sp
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
404 idct_rows idct_col_armv6, 2
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
405
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
406 add sp, sp, #128
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
407 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
408 .endfunc
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
409
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
410 .align
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
411 .global ff_simple_idct_add_armv6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
412 .func ff_simple_idct_add_armv6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
413 /* ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
414 ff_simple_idct_add_armv6:
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
415 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
416 sub sp, sp, #128
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
417
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
418 mov a1, a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
419 mov a2, sp
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
420 idct_rows idct_row_armv6, 2
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
421 mov a1, sp
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
422 ldr a2, [sp, #128]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
423 ldr a3, [sp, #(128+4)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
424 idct_rows idct_col_add_armv6, 1
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
425
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
426 add sp, sp, #(128+8)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
427 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
428 .endfunc
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
429
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
430 .align
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
431 .global ff_simple_idct_put_armv6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
432 .func ff_simple_idct_put_armv6
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
433 /* ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
434 ff_simple_idct_put_armv6:
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
435 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
436 sub sp, sp, #128
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
437
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
438 mov a1, a3
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
439 mov a2, sp
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
440 idct_rows idct_row_armv6, 2
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
441 mov a1, sp
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
442 ldr a2, [sp, #128]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
443 ldr a3, [sp, #(128+4)]
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
444 idct_rows idct_col_put_armv6, 1
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
445
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
446 add sp, sp, #(128+8)
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
447 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
448 .endfunc