Mercurial > libavcodec.hg
annotate bfin/vp3_idct_bfin.S @ 8404:60b6a780100b libavcodec
Port x264 deblocking code to libavcodec.
This includes SSE2 luma deblocking code and both MMXEXT and SSE2 luma
intra deblocking code for H.264 decoding. This assembly is available
under --enable-gpl and speeds decoding of Cathedral by 7%.
author | darkshikari |
---|---|
date | Fri, 19 Dec 2008 13:45:13 +0000 |
parents | 78aa57eba353 |
children | 8327c5b4df9b |
rev | line source |
---|---|
5776
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
1 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
2 * vp3_idct BlackFin |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
3 * |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
4 * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com> |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
5 * |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
6 * This file is part of FFmpeg. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
7 * |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
8 * FFmpeg is free software; you can redistribute it and/or |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
9 * modify it under the terms of the GNU Lesser General Public |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
10 * License as published by the Free Software Foundation; either |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
11 * version 2.1 of the License, or (at your option) any later version. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
12 * |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
13 * FFmpeg is distributed in the hope that it will be useful, |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
16 * Lesser General Public License for more details. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
17 * |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
18 * You should have received a copy of the GNU Lesser General Public |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
19 * License along with FFmpeg; if not, write to the Free Software |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
21 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
22 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
23 This blackfin DSP code implements an 8x8 inverse type II DCT. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
24 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
25 Prototype : void ff_bfin_vp3_idct(DCTELEM *in) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
26 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
27 Registers Used : A0, A1, R0-R7, I0-I3, B0, B2, B3, M0-M2, L0-L3, P0-P5, LC0. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
28 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
29 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
30 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
31 #include "config_bfin.h" |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
32 |
6362
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5776
diff
changeset
|
33 #ifdef __FDPIC__ |
5776
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
34 .section .l1.data.B,"aw",@progbits |
6362
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5776
diff
changeset
|
35 #else |
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5776
diff
changeset
|
36 .data |
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5776
diff
changeset
|
37 #endif |
5776
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
38 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
39 .align 4; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
40 coefs: |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
41 .short 0x5a82; // C4 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
42 .short 0x5a82; // C4 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
43 .short 0x30FC; //cos(3pi/8) C6 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
44 .short 0x7642; //cos(pi/8) C2 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
45 .short 0x18F9; //cos(7pi/16) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
46 .short 0x7D8A; //cos(pi/16) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
47 .short 0x471D; //cos(5pi/16) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
48 .short 0x6A6E; //cos(3pi/16) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
49 .short 0x18F9; //cos(7pi/16) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
50 .short 0x7D8A; //cos(pi/16) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
51 |
6362
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5776
diff
changeset
|
52 #ifdef __FDPIC__ |
5776
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
53 .section .l1.data.A |
6362
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5776
diff
changeset
|
54 #endif |
5776
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
55 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
56 vtmp: .space 256 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
57 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
58 #define TMP0 FP-8 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
59 #define TMP1 FP-12 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
60 #define TMP2 FP-16 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
61 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
62 |
6362
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5776
diff
changeset
|
63 .text |
5776
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
64 DEFUN(vp3_idct,mL1, |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
65 (DCTELEM *block)): |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
66 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
67 /********************** Function Prologue *********************************/ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
68 link 16; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
69 [--SP] = (R7:4, P5:3); // Push the registers onto the stack. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
70 B0 = R0; // Pointer to Input matrix |
6362
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5776
diff
changeset
|
71 RELOC(R1, P3, coefs); // Pointer to Coefficients |
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5776
diff
changeset
|
72 RELOC(R2, P3, vtmp); // Pointer to Temporary matrix |
5776
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
73 B3 = R1; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
74 B2 = R2; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
75 L3 = 20; // L3 is used for making the coefficient array |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
76 // circular. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
77 // MUST BE RESTORED TO ZERO at function exit. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
78 M1 = 16 (X); // All these registers are initialized for |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
79 M3 = 8(X); // modifying address offsets. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
80 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
81 I0 = B0; // I0 points to Input Element (0, 0). |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
82 I2 = B0; // I2 points to Input Element (0, 0). |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
83 I2 += M3 || R0.H = W[I0]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
84 // Element 0 is read into R0.H |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
85 I1 = I2; // I1 points to input Element (0, 6). |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
86 I1 += 4 || R0.L = W[I2++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
87 // I2 points to input Element (0, 4). |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
88 // Element 4 is read into R0.L. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
89 P2 = 8 (X); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
90 P3 = 32 (X); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
91 P4 = -32 (X); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
92 P5 = 98 (X); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
93 R7 = 0x8000(Z); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
94 I3 = B3; // I3 points to Coefficients |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
95 P0 = B2; // P0 points to array Element (0, 0) of temp |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
96 P1 = B2; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
97 R7 = [I3++] || [TMP2]=R7; // Coefficient C4 is read into R7.H and R7.L. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
98 MNOP; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
99 NOP; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
100 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
101 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
102 * A1 = Y0 * cos(pi/4) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
103 * A0 = Y0 * cos(pi/4) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
104 * A1 = A1 + Y4 * cos(pi/4) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
105 * A0 = A0 - Y4 * cos(pi/4) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
106 * load: |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
107 * R1=(Y2,Y6) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
108 * R7=(C2,C6) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
109 * res: |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
110 * R3=Y0, R2=Y4 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
111 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
112 A1=R7.H*R0.H, A0=R7.H*R0.H (IS) || I0+= 4 || R1.L=W[I1++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
113 R3=(A1+=R7.H*R0.L), R2=(A0-=R7.H*R0.L) (IS) || R1.H=W[I0--] || R7=[I3++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
114 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
115 LSETUP (.0, .1) LC0 = P2; // perform 8 1d idcts |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
116 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
117 P2 = 112 (X); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
118 P1 = P1 + P2; // P1 points to element (7, 0) of temp buffer. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
119 P2 = -94(X); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
120 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
121 .0: |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
122 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
123 * A1 = Y2 * cos(3pi/8) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
124 * A0 = Y2 * cos(pi/8) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
125 * A1 = A1 - Y6 * cos(pi/8) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
126 * A0 = A0 + Y6 * cos(3pi/8) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
127 * R5 = (Y1,Y7) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
128 * R7 = (C1,C7) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
129 * res: |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
130 * R1=Y2, R0=Y6 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
131 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
132 A1=R7.L*R1.H, A0=R7.H*R1.H (IS) || I0+=4 || R5.H=W[I0]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
133 R1=(A1-=R7.H*R1.L), R0=(A0+=R7.L*R1.L) (IS) || R5.L=W[I1--] || R7=[I3++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
134 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
135 * Y0 = Y0 + Y6. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
136 * Y4 = Y4 + Y2. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
137 * Y2 = Y4 - Y2. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
138 * Y6 = Y0 - Y6. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
139 * R3 is saved |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
140 * R6.l=Y3 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
141 * note: R3: Y0, R2: Y4, R1: Y2, R0: Y6 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
142 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
143 R3=R3+R0, R0=R3-R0; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
144 R2=R2+R1, R1=R2-R1 || [TMP0]=R3 || R6.L=W[I0--]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
145 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
146 * Compute the odd portion (1,3,5,7) even is done. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
147 * |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
148 * Y1 = C7 * Y1 - C1 * Y7 + C3 * Y5 - C5 * Y3. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
149 * Y7 = C1 * Y1 + C7 * Y7 + C5 * Y5 + C3 * Y3. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
150 * Y5 = C5 * Y1 + C3 * Y7 + C7 * Y5 - C1 * Y3. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
151 * Y3 = C3 * Y1 - C5 * Y7 - C1 * Y5 - C7 * Y3. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
152 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
153 // R5=(Y1,Y7) R6=(Y5,Y3) // R7=(C1,C7) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
154 A1 =R7.L*R5.H, A0 =R7.H*R5.H (IS) || [TMP1]=R2 || R6.H=W[I2--]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
155 A1-=R7.H*R5.L, A0+=R7.L*R5.L (IS) || I0-=4 || R7=[I3++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
156 A1+=R7.H*R6.H, A0+=R7.L*R6.H (IS) || I0+=M1; // R7=(C3,C5) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
157 R3 =(A1-=R7.L*R6.L), R2 =(A0+=R7.H*R6.L) (IS); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
158 A1 =R7.L*R5.H, A0 =R7.H*R5.H (IS) || R4=[TMP0]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
159 A1+=R7.H*R5.L, A0-=R7.L*R5.L (IS) || I1+=M1 || R7=[I3++]; // R7=(C1,C7) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
160 A1+=R7.L*R6.H, A0-=R7.H*R6.H (IS); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
161 R7 =(A1-=R7.H*R6.L), R6 =(A0-=R7.L*R6.L) (IS) || I2+=M1; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
162 // R3=Y1, R2=Y7, R7=Y5, R6=Y3 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
163 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
164 /* Transpose write column. */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
165 R5.H=R4+R2 (RND12); // Y0=Y0+Y7 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
166 R5.L=R4-R2 (RND12) || R4 = [TMP1]; // Y7=Y7-Y0 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
167 R2.H=R1+R7 (RND12) || W[P0++P3]=R5.H; // Y2=Y2+Y5 st Y0 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
168 R2.L=R1-R7 (RND12) || W[P1++P4]=R5.L || R7=[I3++]; // Y5=Y2-Y5 st Y7 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
169 R5.H=R0-R3 (RND12) || W[P0++P3]=R2.H || R1.L=W[I1++]; // Y1=Y6-Y1 st Y2 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
170 R5.L=R0+R3 (RND12) || W[P1++P4]=R2.L || R0.H=W[I0++]; // Y6=Y6+Y1 st Y5 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
171 R3.H=R4-R6 (RND12) || W[P0++P3]=R5.H || R0.L=W[I2++]; // Y3=Y3-Y4 st Y1 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
172 R3.L=R4+R6 (RND12) || W[P1++P4]=R5.L || R1.H=W[I0++]; // Y4=Y3+Y4 st Y6 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
173 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
174 /* pipeline loop start, + drain Y3, Y4 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
175 A1=R7.H*R0.H, A0=R7.H*R0.H (IS) || W[P0++P2]= R3.H || R1.H = W[I0--]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
176 .1: R3=(A1+=R7.H*R0.L), R2=(A0-=R7.H*R0.L) (IS) || W[P1++P5]= R3.L || R7 = [I3++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
177 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
178 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
179 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
180 I0 = B2; // I0 points to Input Element (0, 0) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
181 I2 = B2; // I2 points to Input Element (0, 0) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
182 I2 += M3 || R0.H = W[I0]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
183 // Y0 is read in R0.H |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
184 I1 = I2; // I1 points to input Element (0, 6) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
185 I1 += 4 || R0.L = W[I2++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
186 // I2 points to input Element (0, 4) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
187 // Y4 is read in R0.L |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
188 P2 = 8 (X); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
189 I3 = B3; // I3 points to Coefficients |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
190 P0 = B0; // P0 points to array Element (0, 0) for writing |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
191 // output |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
192 P1 = B0; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
193 R7 = [I3++]; // R7.H = C4 and R7.L = C4 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
194 NOP; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
195 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
196 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
197 * A1 = Y0 * cos(pi/4) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
198 * A0 = Y0 * cos(pi/4) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
199 * A1 = A1 + Y4 * cos(pi/4) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
200 * A0 = A0 - Y4 * cos(pi/4) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
201 * load: |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
202 * R1=(Y2,Y6) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
203 * R7=(C2,C6) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
204 * res: |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
205 * R3=Y0, R2=Y4 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
206 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
207 A1=R7.H*R0.H, A0=R7.H*R0.H (IS) || I0+=4 || R1.L=W[I1++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
208 R3=(A1+=R7.H*R0.L), R2=(A0-=R7.H*R0.L) (IS) || R1.H=W[I0--] || R7=[I3++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
209 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
210 LSETUP (.2, .3) LC0 = P2; // peform 8 1d idcts |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
211 P2 = 112 (X); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
212 P1 = P1 + P2; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
213 P2 = -94(X); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
214 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
215 .2: |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
216 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
217 * A1 = Y2 * cos(3pi/8) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
218 * A0 = Y2 * cos(pi/8) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
219 * A1 = A1 - Y6 * cos(pi/8) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
220 * A0 = A0 + Y6 * cos(3pi/8) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
221 * R5 = (Y1,Y7) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
222 * R7 = (C1,C7) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
223 * res: |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
224 * R1=Y2, R0=Y6 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
225 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
226 A1=R7.L*R1.H, A0=R7.H*R1.H (IS) || I0+=4 || R5.H=W[I0]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
227 R1=(A1-=R7.H*R1.L), R0=(A0+=R7.L*R1.L) (IS) || R5.L=W[I1--] || R7=[I3++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
228 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
229 * Y0 = Y0 + Y6. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
230 * Y4 = Y4 + Y2. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
231 * Y2 = Y4 - Y2. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
232 * Y6 = Y0 - Y6. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
233 * R3 is saved |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
234 * R6.l=Y3 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
235 * note: R3: Y0, R2: Y4, R1: Y2, R0: Y6 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
236 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
237 R3=R3+R0, R0=R3-R0; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
238 R2=R2+R1, R1=R2-R1 || [TMP0]=R3 || R6.L=W[I0--]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
239 /* |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
240 * Compute the odd portion (1,3,5,7) even is done. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
241 * |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
242 * Y1 = C7 * Y1 - C1 * Y7 + C3 * Y5 - C5 * Y3. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
243 * Y7 = C1 * Y1 + C7 * Y7 + C5 * Y5 + C3 * Y3. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
244 * Y5 = C5 * Y1 + C3 * Y7 + C7 * Y5 - C1 * Y3. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
245 * Y3 = C3 * Y1 - C5 * Y7 - C1 * Y5 - C7 * Y3. |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
246 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
247 // R5=(Y1,Y7) R6=(Y5,Y3) // R7=(C1,C7) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
248 A1 =R7.L*R5.H, A0 =R7.H*R5.H (IS) || [TMP1]=R2 || R6.H=W[I2--]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
249 A1-=R7.H*R5.L, A0+=R7.L*R5.L (IS) || I0-=4 || R7=[I3++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
250 A1+=R7.H*R6.H, A0+=R7.L*R6.H (IS) || I0+=M1; // R7=(C3,C5) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
251 R3 =(A1-=R7.L*R6.L), R2 =(A0+=R7.H*R6.L) (IS); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
252 A1 =R7.L*R5.H, A0 =R7.H*R5.H (IS) || R4=[TMP0]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
253 A1+=R7.H*R5.L, A0-=R7.L*R5.L (IS) || I1+=M1 || R7=[I3++]; // R7=(C1,C7) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
254 A1+=R7.L*R6.H, A0-=R7.H*R6.H (IS); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
255 R7 =(A1-=R7.H*R6.L), R6 =(A0-=R7.L*R6.L) (IS) || I2+=M1; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
256 // R3=Y1, R2=Y7, R7=Y5, R6=Y3 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
257 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
258 /* Transpose write column. */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
259 R5.H=R4+R2 (RND20); // Y0=Y0+Y7 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
260 R5.L=R4-R2 (RND20) || R4 = [TMP1]; // Y7=Y7-Y0 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
261 R5=R5>>>2(v); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
262 R2.H=R1+R7 (RND20) || W[P0++P3]=R5.H; // Y2=Y2+Y5 st Y0 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
263 R2.L=R1-R7 (RND20) || W[P1++P4]=R5.L || R7=[I3++]; // Y5=Y2-Y5 st Y7 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
264 R2=R2>>>2(v); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
265 R5.H=R0-R3 (RND20) || W[P0++P3]=R2.H || R1.L=W[I1++]; // Y1=Y6-Y1 st Y2 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
266 R5.L=R0+R3 (RND20) || W[P1++P4]=R2.L || R0.H=W[I0++]; // Y6=Y6+Y1 st Y5 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
267 R5=R5>>>2(v); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
268 R3.H=R4-R6 (RND20) || W[P0++P3]=R5.H || R0.L=W[I2++]; // Y3=Y3-Y4 st Y1 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
269 R3.L=R4+R6 (RND20) || W[P1++P4]=R5.L || R1.H=W[I0++]; // Y4=Y3+Y4 st Y6 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
270 R3=R3>>>2(v); |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
271 /* pipeline loop start, + drain Y3, Y4 */ |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
272 A1=R7.H*R0.H, A0=R7.H*R0.H (IS) || W[P0++P2]= R3.H || R1.H = W[I0--]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
273 .3: R3=(A1+=R7.H*R0.L), R2=(A0-=R7.H*R0.L) (IS) || W[P1++P5]= R3.L || R7 = [I3++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
274 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
275 L3 = 0; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
276 (R7:4,P5:3)=[SP++]; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
277 unlink; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
278 RTS; |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
279 DEFUN_END(vp3_idct) |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
280 |
73ec16dbcbc1
blackfin optimized vp3 transform and infastructure for idct
mhoffman
parents:
diff
changeset
|
281 |