annotate arm/simple_idct_armv6.S @ 11277:c12d6c6c027e libavcodec

Change mvd_cache & mvd_table to 8bit, this is overall a bit faster for high resolution videos. about 20cycles faster per MB for cathederal.
author michael
date Wed, 24 Feb 2010 20:43:06 +0000
parents db79dcbd5161
children 361a5fcb4393
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
1 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
2 * Simple IDCT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
3 *
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5220
744e91a36a23 update my email address
mru
parents: 4867
diff changeset
5 * Copyright (c) 2007 Mans Rullgard <mans@mansr.com>
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
6 *
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
7 * This file is part of FFmpeg.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
8 *
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
9 * FFmpeg is free software; you can redistribute it and/or
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
10 * modify it under the terms of the GNU Lesser General Public
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
11 * License as published by the Free Software Foundation; either
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
12 * version 2.1 of the License, or (at your option) any later version.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
13 *
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
14 * FFmpeg is distributed in the hope that it will be useful,
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
17 * Lesser General Public License for more details.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
18 *
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
19 * You should have received a copy of the GNU Lesser General Public
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
20 * License along with FFmpeg; if not, write to the Free Software
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
22 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
23
8069
316762ae96a7 ARM: use new macros for assembler function labels
mru
parents: 5220
diff changeset
24 #include "asm.S"
316762ae96a7 ARM: use new macros for assembler function labels
mru
parents: 5220
diff changeset
25
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
26 #define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
27 #define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
28 #define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
29 #define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
30 #define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
31 #define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
32 #define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
33 #define ROW_SHIFT 11
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
34 #define COL_SHIFT 20
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
35
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
36 #define W13 (W1 | (W3 << 16))
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
37 #define W26 (W2 | (W6 << 16))
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
38 #define W42 (W4 | (W2 << 16))
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
39 #define W42n (-W4&0xffff | (-W2 << 16))
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
40 #define W46 (W4 | (W6 << 16))
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
41 #define W57 (W5 | (W7 << 16))
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
42
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
43 .text
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
44 .align
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
45 w13: .long W13
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
46 w26: .long W26
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
47 w42: .long W42
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
48 w42n: .long W42n
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
49 w46: .long W46
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
50 w57: .long W57
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
51
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
52 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
53 Compute partial IDCT of single row.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
54 shift = left-shift amount
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
55 r0 = source address
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
56 r2 = row[2,0] <= 2 cycles
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
57 r3 = row[3,1]
4458
9efb3639d7fe save one cycle
mru
parents: 4457
diff changeset
58 ip = w42 <= 2 cycles
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
59
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
60 Output in registers r4--r11
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
61 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
62 .macro idct_row shift
8577
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
63 ldr lr, w46 /* lr = W4 | (W6 << 16) */
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
64 mov r1, #(1<<(\shift-1))
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
65 smlad r4, r2, ip, r1
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
66 smlsd r7, r2, ip, r1
8577
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
67 ldr ip, w13 /* ip = W1 | (W3 << 16) */
8576
636dc45f4779 ARM: clean up pc-relative references in simple_idct_armv6.S
mru
parents: 8575
diff changeset
68 ldr r10,w57 /* r10 = W5 | (W7 << 16) */
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
69 smlad r5, r2, lr, r1
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
70 smlsd r6, r2, lr, r1
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
71
8577
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
72 smuad r8, r3, ip /* r8 = B0 = W1*row[1] + W3*row[3] */
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
73 smusdx r11,r3, r10 /* r11 = B3 = W7*row[1] - W5*row[3] */
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
74 ldr lr, [r0, #12] /* lr = row[7,5] */
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
75 pkhtb r2, ip, r10,asr #16 /* r3 = W7 | (W3 << 16) */
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
76 pkhbt r1, ip, r10,lsl #16 /* r1 = W1 | (W5 << 16) */
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
77 smusdx r9, r2, r3 /* r9 = -B1 = W7*row[3] - W3*row[1] */
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
78 smlad r8, lr, r10,r8 /* B0 += W5*row[5] + W7*row[7] */
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
79 smusdx r10,r3, r1 /* r10 = B2 = W5*row[1] - W1*row[3] */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
80
8577
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
81 ldr r3, w42n /* r3 = -W4 | (-W2 << 16) */
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
82 smlad r10,lr, r2, r10 /* B2 += W7*row[5] + W3*row[7] */
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
83 ldr r2, [r0, #4] /* r2 = row[6,4] */
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
84 smlsdx r11,lr, ip, r11 /* B3 += W3*row[5] - W1*row[7] */
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
85 ldr ip, w46 /* ip = W4 | (W6 << 16) */
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
86 smlad r9, lr, r1, r9 /* B1 -= W1*row[5] + W5*row[7] */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
87
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
88 smlad r5, r2, r3, r5 /* A1 += -W4*row[4] - W2*row[6] */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
89 smlsd r6, r2, r3, r6 /* A2 += -W4*row[4] + W2*row[6] */
8577
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
90 smlad r4, r2, ip, r4 /* A0 += W4*row[4] + W6*row[6] */
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
91 smlsd r7, r2, ip, r7 /* A3 += W4*row[4] - W6*row[6] */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
92 .endm
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
93
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
94 /*
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
95 Compute partial IDCT of half row.
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
96 shift = left-shift amount
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
97 r2 = row[2,0]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
98 r3 = row[3,1]
4460
e7f56ad89720 missed a redundant load
mru
parents: 4458
diff changeset
99 ip = w42
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
100
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
101 Output in registers r4--r11
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
102 */
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
103 .macro idct_row4 shift
8577
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
104 ldr lr, w46 /* lr = W4 | (W6 << 16) */
8576
636dc45f4779 ARM: clean up pc-relative references in simple_idct_armv6.S
mru
parents: 8575
diff changeset
105 ldr r10,w57 /* r10 = W5 | (W7 << 16) */
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
106 mov r1, #(1<<(\shift-1))
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
107 smlad r4, r2, ip, r1
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
108 smlsd r7, r2, ip, r1
8577
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
109 ldr ip, w13 /* ip = W1 | (W3 << 16) */
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
110 smlad r5, r2, lr, r1
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
111 smlsd r6, r2, lr, r1
8577
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
112 smusdx r11,r3, r10 /* r11 = B3 = W7*row[1] - W5*row[3] */
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
113 smuad r8, r3, ip /* r8 = B0 = W1*row[1] + W3*row[3] */
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
114 pkhtb r2, ip, r10,asr #16 /* r3 = W7 | (W3 << 16) */
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
115 pkhbt r1, ip, r10,lsl #16 /* r1 = W1 | (W5 << 16) */
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
116 smusdx r9, r2, r3 /* r9 = -B1 = W7*row[3] - W3*row[1] */
34facb1ab4da ARM: simple_idct_armv6.S whitespace cosmetics
mru
parents: 8576
diff changeset
117 smusdx r10,r3, r1 /* r10 = B2 = W5*row[1] - W1*row[3] */
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
118 .endm
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
119
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
120 /*
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
121 Compute final part of IDCT single row without shift.
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
122 Input in registers r4--r11
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
123 Output in registers ip, r4--r6, lr, r8--r10
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
124 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
125 .macro idct_finish
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
126 add ip, r4, r8 /* r1 = A0 + B0 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
127 sub lr, r4, r8 /* r2 = A0 - B0 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
128 sub r4, r5, r9 /* r2 = A1 + B1 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
129 add r8, r5, r9 /* r2 = A1 - B1 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
130 add r5, r6, r10 /* r1 = A2 + B2 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
131 sub r9, r6, r10 /* r1 = A2 - B2 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
132 add r6, r7, r11 /* r2 = A3 + B3 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
133 sub r10,r7, r11 /* r2 = A3 - B3 */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
134 .endm
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
135
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
136 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
137 Compute final part of IDCT single row.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
138 shift = right-shift amount
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
139 Input/output in registers r4--r11
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
140 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
141 .macro idct_finish_shift shift
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
142 add r3, r4, r8 /* r3 = A0 + B0 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
143 sub r2, r4, r8 /* r2 = A0 - B0 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
144 mov r4, r3, asr #\shift
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
145 mov r8, r2, asr #\shift
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
146
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
147 sub r3, r5, r9 /* r3 = A1 + B1 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
148 add r2, r5, r9 /* r2 = A1 - B1 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
149 mov r5, r3, asr #\shift
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
150 mov r9, r2, asr #\shift
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
151
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
152 add r3, r6, r10 /* r3 = A2 + B2 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
153 sub r2, r6, r10 /* r2 = A2 - B2 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
154 mov r6, r3, asr #\shift
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
155 mov r10,r2, asr #\shift
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
156
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
157 add r3, r7, r11 /* r3 = A3 + B3 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
158 sub r2, r7, r11 /* r2 = A3 - B3 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
159 mov r7, r3, asr #\shift
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
160 mov r11,r2, asr #\shift
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
161 .endm
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
162
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
163 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
164 Compute final part of IDCT single row, saturating results at 8 bits.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
165 shift = right-shift amount
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
166 Input/output in registers r4--r11
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
167 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
168 .macro idct_finish_shift_sat shift
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
169 add r3, r4, r8 /* r3 = A0 + B0 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
170 sub ip, r4, r8 /* ip = A0 - B0 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
171 usat r4, #8, r3, asr #\shift
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
172 usat r8, #8, ip, asr #\shift
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
173
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
174 sub r3, r5, r9 /* r3 = A1 + B1 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
175 add ip, r5, r9 /* ip = A1 - B1 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
176 usat r5, #8, r3, asr #\shift
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
177 usat r9, #8, ip, asr #\shift
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
178
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
179 add r3, r6, r10 /* r3 = A2 + B2 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
180 sub ip, r6, r10 /* ip = A2 - B2 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
181 usat r6, #8, r3, asr #\shift
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
182 usat r10,#8, ip, asr #\shift
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
183
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
184 add r3, r7, r11 /* r3 = A3 + B3 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
185 sub ip, r7, r11 /* ip = A3 - B3 */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
186 usat r7, #8, r3, asr #\shift
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
187 usat r11,#8, ip, asr #\shift
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
188 .endm
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
189
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
190 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
191 Compute IDCT of single row, storing as column.
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
192 r0 = source
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
193 r1 = dest
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
194 */
8069
316762ae96a7 ARM: use new macros for assembler function labels
mru
parents: 5220
diff changeset
195 function idct_row_armv6
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
196 push {lr}
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
197
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
198 ldr lr, [r0, #12] /* lr = row[7,5] */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
199 ldr ip, [r0, #4] /* ip = row[6,4] */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
200 ldr r3, [r0, #8] /* r3 = row[3,1] */
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
201 ldr r2, [r0] /* r2 = row[2,0] */
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
202 orrs lr, lr, ip
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
203 cmpeq lr, r3
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
204 cmpeq lr, r2, lsr #16
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
205 beq 1f
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
206 push {r1}
8576
636dc45f4779 ARM: clean up pc-relative references in simple_idct_armv6.S
mru
parents: 8575
diff changeset
207 ldr ip, w42 /* ip = W4 | (W2 << 16) */
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
208 cmp lr, #0
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
209 beq 2f
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
210
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
211 idct_row ROW_SHIFT
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
212 b 3f
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
213
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
214 2: idct_row4 ROW_SHIFT
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
215
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
216 3: pop {r1}
4483
7a56dc39adef oops, revert accidental checkin
mru
parents: 4482
diff changeset
217 idct_finish_shift ROW_SHIFT
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
218
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
219 strh r4, [r1]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
220 strh r5, [r1, #(16*2)]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
221 strh r6, [r1, #(16*4)]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
222 strh r7, [r1, #(16*6)]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
223 strh r11,[r1, #(16*1)]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
224 strh r10,[r1, #(16*3)]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
225 strh r9, [r1, #(16*5)]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
226 strh r8, [r1, #(16*7)]
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
227
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
228 pop {pc}
4452
c66326f1f635 optimize IDCT of rows with mostly zero coefficients
mru
parents: 4434
diff changeset
229
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
230 1: mov r2, r2, lsl #3
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
231 strh r2, [r1]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
232 strh r2, [r1, #(16*2)]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
233 strh r2, [r1, #(16*4)]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
234 strh r2, [r1, #(16*6)]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
235 strh r2, [r1, #(16*1)]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
236 strh r2, [r1, #(16*3)]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
237 strh r2, [r1, #(16*5)]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
238 strh r2, [r1, #(16*7)]
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
239 pop {pc}
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
240 .endfunc
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
241
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
242 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
243 Compute IDCT of single column, read as row.
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
244 r0 = source
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
245 r1 = dest
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
246 */
8069
316762ae96a7 ARM: use new macros for assembler function labels
mru
parents: 5220
diff changeset
247 function idct_col_armv6
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
248 push {r1, lr}
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
249
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
250 ldr r2, [r0] /* r2 = row[2,0] */
8576
636dc45f4779 ARM: clean up pc-relative references in simple_idct_armv6.S
mru
parents: 8575
diff changeset
251 ldr ip, w42 /* ip = W4 | (W2 << 16) */
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
252 ldr r3, [r0, #8] /* r3 = row[3,1] */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
253 idct_row COL_SHIFT
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
254 pop {r1}
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
255 idct_finish_shift COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
256
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
257 strh r4, [r1]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
258 strh r5, [r1, #(16*1)]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
259 strh r6, [r1, #(16*2)]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
260 strh r7, [r1, #(16*3)]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
261 strh r11,[r1, #(16*4)]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
262 strh r10,[r1, #(16*5)]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
263 strh r9, [r1, #(16*6)]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
264 strh r8, [r1, #(16*7)]
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
265
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
266 pop {pc}
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
267 .endfunc
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
268
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
269 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
270 Compute IDCT of single column, read as row, store saturated 8-bit.
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
271 r0 = source
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
272 r1 = dest
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
273 r2 = line size
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
274 */
8069
316762ae96a7 ARM: use new macros for assembler function labels
mru
parents: 5220
diff changeset
275 function idct_col_put_armv6
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
276 push {r1, r2, lr}
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
277
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
278 ldr r2, [r0] /* r2 = row[2,0] */
8576
636dc45f4779 ARM: clean up pc-relative references in simple_idct_armv6.S
mru
parents: 8575
diff changeset
279 ldr ip, w42 /* ip = W4 | (W2 << 16) */
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
280 ldr r3, [r0, #8] /* r3 = row[3,1] */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
281 idct_row COL_SHIFT
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
282 pop {r1, r2}
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
283 idct_finish_shift_sat COL_SHIFT
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
284
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
285 strb r4, [r1], r2
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
286 strb r5, [r1], r2
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
287 strb r6, [r1], r2
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
288 strb r7, [r1], r2
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
289 strb r11,[r1], r2
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
290 strb r10,[r1], r2
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
291 strb r9, [r1], r2
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
292 strb r8, [r1], r2
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
293
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
294 sub r1, r1, r2, lsl #3
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
295
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
296 pop {pc}
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
297 .endfunc
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
298
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
299 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
300 Compute IDCT of single column, read as row, add/store saturated 8-bit.
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
301 r0 = source
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
302 r1 = dest
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
303 r2 = line size
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
304 */
8069
316762ae96a7 ARM: use new macros for assembler function labels
mru
parents: 5220
diff changeset
305 function idct_col_add_armv6
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
306 push {r1, r2, lr}
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
307
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
308 ldr r2, [r0] /* r2 = row[2,0] */
8576
636dc45f4779 ARM: clean up pc-relative references in simple_idct_armv6.S
mru
parents: 8575
diff changeset
309 ldr ip, w42 /* ip = W4 | (W2 << 16) */
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
310 ldr r3, [r0, #8] /* r3 = row[3,1] */
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
311 idct_row COL_SHIFT
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
312 pop {r1, r2}
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
313 idct_finish
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
314
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
315 ldrb r3, [r1]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
316 ldrb r7, [r1, r2]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
317 ldrb r11,[r1, r2, lsl #2]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
318 add ip, r3, ip, asr #COL_SHIFT
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
319 usat ip, #8, ip
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
320 add r4, r7, r4, asr #COL_SHIFT
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
321 strb ip, [r1], r2
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
322 ldrb ip, [r1, r2]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
323 usat r4, #8, r4
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
324 ldrb r11,[r1, r2, lsl #2]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
325 add r5, ip, r5, asr #COL_SHIFT
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
326 usat r5, #8, r5
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
327 strb r4, [r1], r2
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
328 ldrb r3, [r1, r2]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
329 ldrb ip, [r1, r2, lsl #2]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
330 strb r5, [r1], r2
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
331 ldrb r7, [r1, r2]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
332 ldrb r4, [r1, r2, lsl #2]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
333 add r6, r3, r6, asr #COL_SHIFT
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
334 usat r6, #8, r6
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
335 add r10,r7, r10,asr #COL_SHIFT
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
336 usat r10,#8, r10
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
337 add r9, r11,r9, asr #COL_SHIFT
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
338 usat r9, #8, r9
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
339 add r8, ip, r8, asr #COL_SHIFT
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
340 usat r8, #8, r8
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
341 add lr, r4, lr, asr #COL_SHIFT
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
342 usat lr, #8, lr
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
343 strb r6, [r1], r2
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
344 strb r10,[r1], r2
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
345 strb r9, [r1], r2
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
346 strb r8, [r1], r2
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
347 strb lr, [r1], r2
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
348
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
349 sub r1, r1, r2, lsl #3
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
350
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
351 pop {pc}
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
352 .endfunc
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
353
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
354 /*
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
355 Compute 8 IDCT row transforms.
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
356 func = IDCT row->col function
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
357 width = width of columns in bytes
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
358 */
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
359 .macro idct_rows func width
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
360 bl \func
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
361 add r0, r0, #(16*2)
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
362 add r1, r1, #\width
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
363 bl \func
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
364 add r0, r0, #(16*2)
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
365 add r1, r1, #\width
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
366 bl \func
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
367 add r0, r0, #(16*2)
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
368 add r1, r1, #\width
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
369 bl \func
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
370 sub r0, r0, #(16*5)
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
371 add r1, r1, #\width
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
372 bl \func
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
373 add r0, r0, #(16*2)
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
374 add r1, r1, #\width
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
375 bl \func
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
376 add r0, r0, #(16*2)
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
377 add r1, r1, #\width
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
378 bl \func
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
379 add r0, r0, #(16*2)
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
380 add r1, r1, #\width
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
381 bl \func
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
382
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
383 sub r0, r0, #(16*7)
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
384 .endm
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
385
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
386 /* void ff_simple_idct_armv6(DCTELEM *data); */
8069
316762ae96a7 ARM: use new macros for assembler function labels
mru
parents: 5220
diff changeset
387 function ff_simple_idct_armv6, export=1
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
388 push {r4-r11, lr}
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
389 sub sp, sp, #128
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
390
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
391 mov r1, sp
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
392 idct_rows idct_row_armv6, 2
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
393 mov r1, r0
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
394 mov r0, sp
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
395 idct_rows idct_col_armv6, 2
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
396
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
397 add sp, sp, #128
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
398 pop {r4-r11, pc}
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
399 .endfunc
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
400
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
401 /* ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
8069
316762ae96a7 ARM: use new macros for assembler function labels
mru
parents: 5220
diff changeset
402 function ff_simple_idct_add_armv6, export=1
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
403 push {r0, r1, r4-r11, lr}
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
404 sub sp, sp, #128
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
405
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
406 mov r0, r2
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
407 mov r1, sp
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
408 idct_rows idct_row_armv6, 2
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
409 mov r0, sp
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
410 ldr r1, [sp, #128]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
411 ldr r2, [sp, #(128+4)]
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
412 idct_rows idct_col_add_armv6, 1
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
413
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
414 add sp, sp, #(128+8)
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
415 pop {r4-r11, pc}
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
416 .endfunc
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
417
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
418 /* ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
8069
316762ae96a7 ARM: use new macros for assembler function labels
mru
parents: 5220
diff changeset
419 function ff_simple_idct_put_armv6, export=1
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
420 push {r0, r1, r4-r11, lr}
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
421 sub sp, sp, #128
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
422
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
423 mov r0, r2
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
424 mov r1, sp
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
425 idct_rows idct_row_armv6, 2
8575
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
426 mov r0, sp
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
427 ldr r1, [sp, #128]
0b9dff3a1ce2 ARM: use rX register names in simple_idct_armv6.S
mru
parents: 8359
diff changeset
428 ldr r2, [sp, #(128+4)]
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
429 idct_rows idct_col_put_armv6, 1
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
430
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
431 add sp, sp, #(128+8)
8578
db79dcbd5161 ARM: use push/pop pseudo-instructions in simple_idct_armv6.S
mru
parents: 8577
diff changeset
432 pop {r4-r11, pc}
4427
765df9cbb2b3 ARMv6 SIMD IDCT
mru
parents:
diff changeset
433 .endfunc