Mercurial > libavcodec.hg
annotate bfin/vp3_idct_bfin.S @ 12483:0159a19bfff7 libavcodec
aacdec: Rework channel mapping compatibility hacks.
For a PCE based configuration map the channels solely based on tags.
For an indexed configuration map the channels solely based on position.
This works with all known exotic samples including al17, elem_id0, bad_concat,
and lfe_is_sce.
author | alexc |
---|---|
date | Fri, 10 Sep 2010 18:01:48 +0000 |
parents | 8327c5b4df9b |
children |
rev | line source |
---|---|
5776
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
1 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
2 * vp3_idct BlackFin |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
3 * |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
4 * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com> |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
5 * |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
6 * This file is part of FFmpeg. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
7 * |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
8 * FFmpeg is free software; you can redistribute it and/or |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
9 * modify it under the terms of the GNU Lesser General Public |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
10 * License as published by the Free Software Foundation; either |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
11 * version 2.1 of the License, or (at your option) any later version. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
12 * |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
13 * FFmpeg is distributed in the hope that it will be useful, |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
16 * Lesser General Public License for more details. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
17 * |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
18 * You should have received a copy of the GNU Lesser General Public |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
19 * License along with FFmpeg; if not, write to the Free Software |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
21 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
22 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
23 This blackfin DSP code implements an 8x8 inverse type II DCT. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
24 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
25 Prototype : void ff_bfin_vp3_idct(DCTELEM *in) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
26 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
27 Registers Used : A0, A1, R0-R7, I0-I3, B0, B2, B3, M0-M2, L0-L3, P0-P5, LC0. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
28 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
29 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
30 |
11063 | 31 #include "config.h" |
5776
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
32 #include "config_bfin.h" |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
33 |
11063 | 34 #if defined(__FDPIC__) && CONFIG_SRAM |
5776
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
35 .section .l1.data.B,"aw",@progbits |
6362
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5776
diff
changeset
|
36 #else |
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5776
diff
changeset
|
37 .data |
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5776
diff
changeset
|
38 #endif |
5776
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
39 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
40 .align 4; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
41 coefs: |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
42 .short 0x5a82; // C4 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
43 .short 0x5a82; // C4 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
44 .short 0x30FC; //cos(3pi/8) C6 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
45 .short 0x7642; //cos(pi/8) C2 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
46 .short 0x18F9; //cos(7pi/16) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
47 .short 0x7D8A; //cos(pi/16) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
48 .short 0x471D; //cos(5pi/16) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
49 .short 0x6A6E; //cos(3pi/16) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
50 .short 0x18F9; //cos(7pi/16) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
51 .short 0x7D8A; //cos(pi/16) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
52 |
11063 | 53 #if defined(__FDPIC__) && CONFIG_SRAM |
5776
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
54 .section .l1.data.A |
6362
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5776
diff
changeset
|
55 #endif |
5776
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
56 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
57 vtmp: .space 256 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
58 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
59 #define TMP0 FP-8 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
60 #define TMP1 FP-12 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
61 #define TMP2 FP-16 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
62 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
63 |
6362
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5776
diff
changeset
|
64 .text |
5776
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
65 DEFUN(vp3_idct,mL1, |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
66 (DCTELEM *block)): |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
67 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
68 /********************** Function Prologue *********************************/ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
69 link 16; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
70 [--SP] = (R7:4, P5:3); // Push the registers onto the stack. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
71 B0 = R0; // Pointer to Input matrix |
6362
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5776
diff
changeset
|
72 RELOC(R1, P3, coefs); // Pointer to Coefficients |
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5776
diff
changeset
|
73 RELOC(R2, P3, vtmp); // Pointer to Temporary matrix |
5776
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
74 B3 = R1; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
75 B2 = R2; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
76 L3 = 20; // L3 is used for making the coefficient array |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
77 // circular. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
78 // MUST BE RESTORED TO ZERO at function exit. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
79 M1 = 16 (X); // All these registers are initialized for |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
80 M3 = 8(X); // modifying address offsets. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
81 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
82 I0 = B0; // I0 points to Input Element (0, 0). |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
83 I2 = B0; // I2 points to Input Element (0, 0). |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
84 I2 += M3 || R0.H = W[I0]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
85 // Element 0 is read into R0.H |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
86 I1 = I2; // I1 points to input Element (0, 6). |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
87 I1 += 4 || R0.L = W[I2++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
88 // I2 points to input Element (0, 4). |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
89 // Element 4 is read into R0.L. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
90 P2 = 8 (X); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
91 P3 = 32 (X); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
92 P4 = -32 (X); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
93 P5 = 98 (X); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
94 R7 = 0x8000(Z); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
95 I3 = B3; // I3 points to Coefficients |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
96 P0 = B2; // P0 points to array Element (0, 0) of temp |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
97 P1 = B2; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
98 R7 = [I3++] || [TMP2]=R7; // Coefficient C4 is read into R7.H and R7.L. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
99 MNOP; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
100 NOP; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
101 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
102 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
103 * A1 = Y0 * cos(pi/4) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
104 * A0 = Y0 * cos(pi/4) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
105 * A1 = A1 + Y4 * cos(pi/4) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
106 * A0 = A0 - Y4 * cos(pi/4) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
107 * load: |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
108 * R1=(Y2,Y6) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
109 * R7=(C2,C6) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
110 * res: |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
111 * R3=Y0, R2=Y4 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
112 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
113 A1=R7.H*R0.H, A0=R7.H*R0.H (IS) || I0+= 4 || R1.L=W[I1++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
114 R3=(A1+=R7.H*R0.L), R2=(A0-=R7.H*R0.L) (IS) || R1.H=W[I0--] || R7=[I3++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
115 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
116 LSETUP (.0, .1) LC0 = P2; // perform 8 1d idcts |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
117 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
118 P2 = 112 (X); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
119 P1 = P1 + P2; // P1 points to element (7, 0) of temp buffer. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
120 P2 = -94(X); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
121 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
122 .0: |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
123 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
124 * A1 = Y2 * cos(3pi/8) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
125 * A0 = Y2 * cos(pi/8) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
126 * A1 = A1 - Y6 * cos(pi/8) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
127 * A0 = A0 + Y6 * cos(3pi/8) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
128 * R5 = (Y1,Y7) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
129 * R7 = (C1,C7) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
130 * res: |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
131 * R1=Y2, R0=Y6 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
132 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
133 A1=R7.L*R1.H, A0=R7.H*R1.H (IS) || I0+=4 || R5.H=W[I0]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
134 R1=(A1-=R7.H*R1.L), R0=(A0+=R7.L*R1.L) (IS) || R5.L=W[I1--] || R7=[I3++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
135 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
136 * Y0 = Y0 + Y6. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
137 * Y4 = Y4 + Y2. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
138 * Y2 = Y4 - Y2. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
139 * Y6 = Y0 - Y6. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
140 * R3 is saved |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
141 * R6.l=Y3 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
142 * note: R3: Y0, R2: Y4, R1: Y2, R0: Y6 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
143 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
144 R3=R3+R0, R0=R3-R0; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
145 R2=R2+R1, R1=R2-R1 || [TMP0]=R3 || R6.L=W[I0--]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
146 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
147 * Compute the odd portion (1,3,5,7) even is done. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
148 * |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
149 * Y1 = C7 * Y1 - C1 * Y7 + C3 * Y5 - C5 * Y3. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
150 * Y7 = C1 * Y1 + C7 * Y7 + C5 * Y5 + C3 * Y3. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
151 * Y5 = C5 * Y1 + C3 * Y7 + C7 * Y5 - C1 * Y3. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
152 * Y3 = C3 * Y1 - C5 * Y7 - C1 * Y5 - C7 * Y3. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
153 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
154 // R5=(Y1,Y7) R6=(Y5,Y3) // R7=(C1,C7) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
155 A1 =R7.L*R5.H, A0 =R7.H*R5.H (IS) || [TMP1]=R2 || R6.H=W[I2--]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
156 A1-=R7.H*R5.L, A0+=R7.L*R5.L (IS) || I0-=4 || R7=[I3++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
157 A1+=R7.H*R6.H, A0+=R7.L*R6.H (IS) || I0+=M1; // R7=(C3,C5) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
158 R3 =(A1-=R7.L*R6.L), R2 =(A0+=R7.H*R6.L) (IS); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
159 A1 =R7.L*R5.H, A0 =R7.H*R5.H (IS) || R4=[TMP0]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
160 A1+=R7.H*R5.L, A0-=R7.L*R5.L (IS) || I1+=M1 || R7=[I3++]; // R7=(C1,C7) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
161 A1+=R7.L*R6.H, A0-=R7.H*R6.H (IS); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
162 R7 =(A1-=R7.H*R6.L), R6 =(A0-=R7.L*R6.L) (IS) || I2+=M1; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
163 // R3=Y1, R2=Y7, R7=Y5, R6=Y3 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
164 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
165 /* Transpose write column. */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
166 R5.H=R4+R2 (RND12); // Y0=Y0+Y7 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
167 R5.L=R4-R2 (RND12) || R4 = [TMP1]; // Y7=Y7-Y0 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
168 R2.H=R1+R7 (RND12) || W[P0++P3]=R5.H; // Y2=Y2+Y5 st Y0 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
169 R2.L=R1-R7 (RND12) || W[P1++P4]=R5.L || R7=[I3++]; // Y5=Y2-Y5 st Y7 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
170 R5.H=R0-R3 (RND12) || W[P0++P3]=R2.H || R1.L=W[I1++]; // Y1=Y6-Y1 st Y2 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
171 R5.L=R0+R3 (RND12) || W[P1++P4]=R2.L || R0.H=W[I0++]; // Y6=Y6+Y1 st Y5 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
172 R3.H=R4-R6 (RND12) || W[P0++P3]=R5.H || R0.L=W[I2++]; // Y3=Y3-Y4 st Y1 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
173 R3.L=R4+R6 (RND12) || W[P1++P4]=R5.L || R1.H=W[I0++]; // Y4=Y3+Y4 st Y6 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
174 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
175 /* pipeline loop start, + drain Y3, Y4 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
176 A1=R7.H*R0.H, A0=R7.H*R0.H (IS) || W[P0++P2]= R3.H || R1.H = W[I0--]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
177 .1: R3=(A1+=R7.H*R0.L), R2=(A0-=R7.H*R0.L) (IS) || W[P1++P5]= R3.L || R7 = [I3++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
178 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
179 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
180 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
181 I0 = B2; // I0 points to Input Element (0, 0) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
182 I2 = B2; // I2 points to Input Element (0, 0) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
183 I2 += M3 || R0.H = W[I0]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
184 // Y0 is read in R0.H |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
185 I1 = I2; // I1 points to input Element (0, 6) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
186 I1 += 4 || R0.L = W[I2++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
187 // I2 points to input Element (0, 4) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
188 // Y4 is read in R0.L |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
189 P2 = 8 (X); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
190 I3 = B3; // I3 points to Coefficients |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
191 P0 = B0; // P0 points to array Element (0, 0) for writing |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
192 // output |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
193 P1 = B0; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
194 R7 = [I3++]; // R7.H = C4 and R7.L = C4 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
195 NOP; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
196 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
197 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
198 * A1 = Y0 * cos(pi/4) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
199 * A0 = Y0 * cos(pi/4) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
200 * A1 = A1 + Y4 * cos(pi/4) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
201 * A0 = A0 - Y4 * cos(pi/4) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
202 * load: |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
203 * R1=(Y2,Y6) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
204 * R7=(C2,C6) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
205 * res: |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
206 * R3=Y0, R2=Y4 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
207 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
208 A1=R7.H*R0.H, A0=R7.H*R0.H (IS) || I0+=4 || R1.L=W[I1++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
209 R3=(A1+=R7.H*R0.L), R2=(A0-=R7.H*R0.L) (IS) || R1.H=W[I0--] || R7=[I3++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
210 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
211 LSETUP (.2, .3) LC0 = P2; // peform 8 1d idcts |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
212 P2 = 112 (X); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
213 P1 = P1 + P2; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
214 P2 = -94(X); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
215 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
216 .2: |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
217 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
218 * A1 = Y2 * cos(3pi/8) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
219 * A0 = Y2 * cos(pi/8) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
220 * A1 = A1 - Y6 * cos(pi/8) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
221 * A0 = A0 + Y6 * cos(3pi/8) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
222 * R5 = (Y1,Y7) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
223 * R7 = (C1,C7) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
224 * res: |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
225 * R1=Y2, R0=Y6 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
226 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
227 A1=R7.L*R1.H, A0=R7.H*R1.H (IS) || I0+=4 || R5.H=W[I0]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
228 R1=(A1-=R7.H*R1.L), R0=(A0+=R7.L*R1.L) (IS) || R5.L=W[I1--] || R7=[I3++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
229 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
230 * Y0 = Y0 + Y6. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
231 * Y4 = Y4 + Y2. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
232 * Y2 = Y4 - Y2. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
233 * Y6 = Y0 - Y6. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
234 * R3 is saved |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
235 * R6.l=Y3 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
236 * note: R3: Y0, R2: Y4, R1: Y2, R0: Y6 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
237 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
238 R3=R3+R0, R0=R3-R0; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
239 R2=R2+R1, R1=R2-R1 || [TMP0]=R3 || R6.L=W[I0--]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
240 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
241 * Compute the odd portion (1,3,5,7) even is done. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
242 * |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
243 * Y1 = C7 * Y1 - C1 * Y7 + C3 * Y5 - C5 * Y3. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
244 * Y7 = C1 * Y1 + C7 * Y7 + C5 * Y5 + C3 * Y3. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
245 * Y5 = C5 * Y1 + C3 * Y7 + C7 * Y5 - C1 * Y3. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
246 * Y3 = C3 * Y1 - C5 * Y7 - C1 * Y5 - C7 * Y3. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
247 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
248 // R5=(Y1,Y7) R6=(Y5,Y3) // R7=(C1,C7) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
249 A1 =R7.L*R5.H, A0 =R7.H*R5.H (IS) || [TMP1]=R2 || R6.H=W[I2--]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
250 A1-=R7.H*R5.L, A0+=R7.L*R5.L (IS) || I0-=4 || R7=[I3++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
251 A1+=R7.H*R6.H, A0+=R7.L*R6.H (IS) || I0+=M1; // R7=(C3,C5) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
252 R3 =(A1-=R7.L*R6.L), R2 =(A0+=R7.H*R6.L) (IS); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
253 A1 =R7.L*R5.H, A0 =R7.H*R5.H (IS) || R4=[TMP0]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
254 A1+=R7.H*R5.L, A0-=R7.L*R5.L (IS) || I1+=M1 || R7=[I3++]; // R7=(C1,C7) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
255 A1+=R7.L*R6.H, A0-=R7.H*R6.H (IS); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
256 R7 =(A1-=R7.H*R6.L), R6 =(A0-=R7.L*R6.L) (IS) || I2+=M1; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
257 // R3=Y1, R2=Y7, R7=Y5, R6=Y3 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
258 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
259 /* Transpose write column. */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
260 R5.H=R4+R2 (RND20); // Y0=Y0+Y7 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
261 R5.L=R4-R2 (RND20) || R4 = [TMP1]; // Y7=Y7-Y0 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
262 R5=R5>>>2(v); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
263 R2.H=R1+R7 (RND20) || W[P0++P3]=R5.H; // Y2=Y2+Y5 st Y0 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
264 R2.L=R1-R7 (RND20) || W[P1++P4]=R5.L || R7=[I3++]; // Y5=Y2-Y5 st Y7 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
265 R2=R2>>>2(v); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
266 R5.H=R0-R3 (RND20) || W[P0++P3]=R2.H || R1.L=W[I1++]; // Y1=Y6-Y1 st Y2 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
267 R5.L=R0+R3 (RND20) || W[P1++P4]=R2.L || R0.H=W[I0++]; // Y6=Y6+Y1 st Y5 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
268 R5=R5>>>2(v); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
269 R3.H=R4-R6 (RND20) || W[P0++P3]=R5.H || R0.L=W[I2++]; // Y3=Y3-Y4 st Y1 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
270 R3.L=R4+R6 (RND20) || W[P1++P4]=R5.L || R1.H=W[I0++]; // Y4=Y3+Y4 st Y6 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
271 R3=R3>>>2(v); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
272 /* pipeline loop start, + drain Y3, Y4 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
273 A1=R7.H*R0.H, A0=R7.H*R0.H (IS) || W[P0++P2]= R3.H || R1.H = W[I0--]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
274 .3: R3=(A1+=R7.H*R0.L), R2=(A0-=R7.H*R0.L) (IS) || W[P1++P5]= R3.L || R7 = [I3++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
275 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
276 L3 = 0; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
277 (R7:4,P5:3)=[SP++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
278 unlink; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
279 RTS; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
280 DEFUN_END(vp3_idct) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
281 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
282 |