Mercurial > libavcodec.hg
annotate bfin/fdct_bfin.S @ 12197:fbf4d5b1b664 libavcodec
Remove FF_MM_SSE2/3 flags for CPUs where this is generally not faster than
regular MMX code. Examples of this are the Core1 CPU. Instead, set a new flag,
FF_MM_SSE2/3SLOW, which can be checked for particular SSE2/3 functions that
have been checked specifically on such CPUs and are actually faster than
their MMX counterparts.
In addition, use this flag to enable particular VP8 and LPC SSE2 functions
that are faster than their MMX counterparts.
Based on a patch by Loren Merritt <lorenm AT u washington edu>.
author | rbultje |
---|---|
date | Mon, 19 Jul 2010 22:38:23 +0000 |
parents | 8327c5b4df9b |
children |
rev | line source |
---|---|
4765
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
1 /* |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
2 * fdct BlackFin |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
3 * |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
4 * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com> |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
5 * |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
6 * This file is part of FFmpeg. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
7 * |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
8 * FFmpeg is free software; you can redistribute it and/or |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
9 * modify it under the terms of the GNU Lesser General Public |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
10 * License as published by the Free Software Foundation; either |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
11 * version 2.1 of the License, or (at your option) any later version. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
12 * |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
13 * FFmpeg is distributed in the hope that it will be useful, |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
16 * Lesser General Public License for more details. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
17 * |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
18 * You should have received a copy of the GNU Lesser General Public |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
19 * License along with FFmpeg; if not, write to the Free Software |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
21 */ |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
22 /* |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
23 void ff_bfin_fdct (DCTELEM *buf); |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
24 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
25 This implementation works only for 8x8 input. The range of input |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
26 must be -256 to 255 i.e. 8bit input represented in a 16bit data |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
27 word. The original data must be sign extended into the 16bit data |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
28 words. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
29 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
30 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
31 Chen factorization of |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
32 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
33 8 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
34 X(m) = sum (x(n) * cos ((2n+1)*m*pi/16)) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
35 n=0 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
36 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
37 C4 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
38 0 --*-------------*0+7---*-----*0+3-------*-*-------------------> 0 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
39 \ / \ / X S4,S4 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
40 1 --*-\---------/-*1+6---*-\-/-*1+2-------*-*-------------------> 4 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
41 \ / \ -C4 C3 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
42 2 --*---\-----/---*2+5---*-/-\-*1-2---------------*-*-----------> 2 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
43 \ / / \ X S3,-S3 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
44 3 --*-----\-/-----*3+4---*-----*0-3---------------*-*-----------> 6 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
45 / C7 C3 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
46 4 --*-----/-\-----*3-4------------*-*4+5--*-----*---------------> 1 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
47 / \ -C4 X \ /S7 C3 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
48 5 --*---/-----\---*2-5---*-*------*=*4-5----\-/------*-*--------> 5 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
49 / \ X S4,S4 / X S3,-S3 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
50 6 --*-/---------\-*1-6---*-*------*=*7-6----/-\------*-*--------> 3 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
51 / \ C4 X / \-S7 C3 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
52 --*-------------*0-7------------*-*7+6--*-----*---------------> 7 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
53 C7 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
54 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
55 Notation |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
56 Cn = cos(n*pi/8) used throughout the code. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
57 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
58 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
59 Registers used: |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
60 R0, R1, R2, R3, R4, R5, R6,R7, P0, P1, P2, P3, P4, P5, A0, A1. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
61 Other registers used: |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
62 I0, I1, I2, I3, B0, B2, B3, M0, M1, L3 registers and LC0. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
63 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
64 Input - r0 - pointer to start of DCTELEM *block |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
65 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
66 Output - The DCT output coefficients in the DCTELEM *block |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
67 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
68 Register constraint: |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
69 This code is called from jpeg_encode. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
70 R6, R5, R4 if modified should be stored and restored. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
71 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
72 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
73 Performance: (Timer version 0.6.33) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
74 Code Size : 240 Bytes. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
75 Memory Required : |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
76 Input Matrix : 8 * 8 * 2 Bytes. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
77 Coefficients : 16 Bytes |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
78 Temporary matrix: 8 * 8 * 2 Bytes. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
79 Cycle Count :26+{18+8*(14+2S)}*2 where S -> Stalls |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
80 (7.45 c/pel) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
81 ----------------------------------------- |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
82 | Size | Forward DCT | Inverse DCT | |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
83 ----------------------------------------- |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
84 | 8x8 | 284 Cycles | 311 Cycles | |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
85 ----------------------------------------- |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
86 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
87 Ck = int16(cos(k/16*pi)*32767+.5)/2 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
88 #define C4 23170 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
89 #define C3 13623 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
90 #define C6 6270 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
91 #define C7 3196 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
92 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
93 Sk = int16(sin(k/16*pi)*32767+.5)/2 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
94 #define S4 11585 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
95 #define S3 9102 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
96 #define S6 15137 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
97 #define S7 16069 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
98 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
99 the coefficients are ordered as follows: |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
100 short dct_coef[] |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
101 C4,S4, |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
102 C6,S6, |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
103 C7,S7, |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
104 S3,C3, |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
105 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
106 ----------------------------------------------------------- |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
107 FFMPEG conformance testing results |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
108 ----------------------------------------------------------- |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
109 dct-test: modified with the following |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
110 dct_error("BFINfdct", 0, ff_bfin_fdct, fdct, test); |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
111 produces the following output: |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
112 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
113 root:/u/ffmpeg/bhead/libavcodec> ./dct-test |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
114 ffmpeg DCT/IDCT test |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
115 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
116 2 -131 -6 -48 -36 33 -83 24 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
117 34 52 -24 -15 5 92 57 143 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
118 -67 -43 -1 74 -16 5 -71 32 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
119 -78 106 92 -34 -38 81 20 -18 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
120 7 -62 40 2 -15 90 -62 -83 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
121 -83 1 -104 -13 43 -19 7 11 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
122 -63 31 12 -29 83 72 21 10 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
123 -17 -63 -15 73 50 -91 159 -14 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
124 DCT BFINfdct: err_inf=2 err2=0.16425938 syserr=0.00795000 maxout=2098 blockSumErr=27 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
125 DCT BFINfdct: 92.1 kdct/s |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
126 root:/u/ffmpeg/bhead/libavcodec> |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
127 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
128 */ |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
129 |
11063 | 130 #include "config.h" |
4765
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
131 #include "config_bfin.h" |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
132 |
11063 | 133 #if defined(__FDPIC__) && CONFIG_SRAM |
4765
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
134 .section .l1.data.B,"aw",@progbits |
6362
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5001
diff
changeset
|
135 #else |
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5001
diff
changeset
|
136 .data |
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5001
diff
changeset
|
137 #endif |
4765
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
138 .align 4; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
139 dct_coeff: |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
140 .short 0x5a82, 0x2d41, 0x187e, 0x3b21, 0x0c7c, 0x3ec5, 0x238e, 0x3537; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
141 |
11063 | 142 #if defined(__FDPIC__) && CONFIG_SRAM |
4765
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
143 .section .l1.data.A,"aw",@progbits |
6362
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5001
diff
changeset
|
144 #endif |
4765
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
145 .align 4 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
146 vtmp: .space 128 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
147 |
6362
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5001
diff
changeset
|
148 .text |
4765
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
149 DEFUN(fdct,mL1, |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
150 (DCTELEM *block)): |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
151 [--SP] = (R7:4, P5:3); // Push the registers onto the stack. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
152 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
153 b0 = r0; |
6362
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5001
diff
changeset
|
154 RELOC(r0, P3, dct_coeff); |
4765
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
155 b3 = r0; |
6362
78aa57eba353
FLAT objects cannot have multiple sections, so using the L1 attributes breaks
diego
parents:
5001
diff
changeset
|
156 RELOC(r0, P3, vtmp); |
4765
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
157 b2 = r0; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
158 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
159 L3 = 16; // L3 is set to 16 to make the coefficient |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
160 // array Circular. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
161 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
162 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
163 //---------------------------------------------------------------------------- |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
164 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
165 /* |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
166 * I0, I1, and I2 registers are used to read the input data. I3 register is used |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
167 * to read the coefficients. P0 and P1 registers are used for writing the output |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
168 * data. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
169 */ |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
170 M0 = 12 (X); // All these initializations are used in the |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
171 M1 = 16 (X); // modification of address offsets. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
172 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
173 M2 = 128 (X); |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
174 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
175 P2 = 16; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
176 P3 = 32 (X); |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
177 P4 = -110 (X); |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
178 P5 = -62 (X); |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
179 P0 = 2(X); |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
180 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
181 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
182 // Prescale the input to get the correct precision. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
183 i0=b0; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
184 i1=b0; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
185 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
186 lsetup (.0, .1) LC0 = P3; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
187 r0=[i0++]; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
188 .0: r1=r0<<3 (v) || r0=[i0++] ; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
189 .1: [i1++]=r1; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
190 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
191 /* |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
192 * B0 points to the "in" buffer. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
193 * B2 points to "temp" buffer in the first iteration. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
194 */ |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
195 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
196 lsetup (.2, .3) LC0 = P0; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
197 .2: |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
198 I0 = B0; // I0 points to Input Element (0, 0). |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
199 I1 = B0; // Element 1 and 0 is read in R0. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
200 I1 += M0 || R0 = [I0++]; // I1 points to Input Element (0, 6). |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
201 I2 = I1; // Element 6 is read into R3.H. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
202 I2 -= 4 || R3.H = W[I1++]; // I2 points to Input Element (0, 4). |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
203 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
204 I3 = B3; // I3 points to Coefficients. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
205 P0 = B2; // P0 points to temporary array Element |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
206 // (0, 0). |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
207 P1 = B2; // P1 points to temporary array. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
208 R7 = [P1++P2] || R2 = [I2++]; // P1 points to temporary array |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
209 // Element (1, 0). |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
210 // R7 is a dummy read. X4,X5 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
211 // are read into R2. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
212 R3.L = W[I1--]; // X7 is read into R3.L. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
213 R1.H = W[I0++]; // X2 is read into R1.H. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
214 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
215 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
216 /* |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
217 * X0 = (X0 + X7) / 2. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
218 * X1 = (X1 + X6) / 2. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
219 * X6 = (X1 - X6) / 2. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
220 * X7 = (X0 - X7) / 2. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
221 * It reads the data 3 in R1.L. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
222 */ |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
223 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
224 R0 = R0 +|+ R3, R3 = R0 -|- R3 || R1.L = W[I0++] || NOP; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
225 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
226 /* |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
227 * X2 = (X2 + X5) / 2. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
228 * X3 = (X3 + X4) / 2. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
229 * X4 = (X3 - X4) / 2. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
230 * X5 = (X2 - X5) / 2. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
231 * R7 = C4 = cos(4*pi/16) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
232 */ |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
233 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
234 R1 = R1 +|+ R2, R2 = R1 -|- R2 (CO) || NOP || R7 = [I3++]; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
235 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
236 /* |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
237 * At the end of stage 1 R0 has (1,0), R1 has (2,3), R2 has (4, 5) and |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
238 * R3 has (6,7). |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
239 * Where the notation (x, y) represents uper/lower half pairs. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
240 */ |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
241 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
242 /* |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
243 * X0 = X0 + X3. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
244 * X1 = X1 + X2. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
245 * X2 = X1 - X2. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
246 * X3 = X0 - X3. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
247 */ |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
248 R0 = R0 +|+ R1, R1 = R0 -|- R1; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
249 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
250 lsetup (.row0, .row1) LC1 = P2 >> 1; // 1d dct, loops 8x |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
251 .row0: |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
252 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
253 /* |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
254 * This is part 2 computation continued..... |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
255 * A1 = X6 * cos(pi/4) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
256 * A0 = X6 * cos(pi/4) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
257 * A1 = A1 - X5 * cos(pi/4) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
258 * A0 = A0 + X5 * cos(pi/4). |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
259 * The instruction W[I0] = R3.L is used for packing it to R2.L. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
260 */ |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
261 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
262 A1=R3.H*R7.l, A0=R3.H*R7.l || I1+=M1 || W[I0] = R3.L; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
263 R4.H=(A1-=R2.L*R7.l), R4.L=(A0+=R2.L*R7.l) || I2+=M0 || NOP; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
264 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
265 /* R0 = (X1,X0) R1 = (X2,X3) R4 = (X5, X6). */ |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
266 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
267 /* |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
268 * A1 = X0 * cos(pi/4) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
269 * A0 = X0 * cos(pi/4) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
270 * A1 = A1 - X1 * cos(pi/4) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
271 * A0 = A0 + X1 * cos(pi/4) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
272 * R7 = (C2,C6) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
273 */ |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
274 A1=R0.L*R7.h, A0=R0.L*R7.h || NOP || R3.H=W[I1++]; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
275 R5.H=(A1-=R0.H*R7.h),R5.L=(A0+=R0.H*R7.h) || R7=[I3++] || NOP; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
276 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
277 /* |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
278 * A1 = X2 * cos(3pi/8) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
279 * A0 = X3 * cos(3pi/8) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
280 * A1 = A1 + X3 * cos(pi/8) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
281 * A0 = A0 - X2 * cos(pi/8) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
282 * R3 = cos(pi/4) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
283 * R7 = (cos(7pi/8),cos(pi/8)) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
284 * X4 = X4 + X5. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
285 * X5 = X4 - X5. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
286 * X6 = X7 - X6. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
287 * X7 = X7 + X6. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
288 */ |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
289 A1=R1.H*R7.L, A0=R1.L*R7.L || W[P0++P3]=R5.L || R2.L=W[I0]; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
290 R2=R2+|+R4, R4=R2-|-R4 || I0+=4 || R3.L=W[I1--]; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
291 R6.H=(A1+=R1.L*R7.H),R6.L=(A0 -= R1.H * R7.H) || I0+=4 || R7=[I3++]; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
292 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
293 /* R2 = (X4, X7) R4 = (X5,X6) R5 = (X1, X0) R6 = (X2,X3). */ |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
294 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
295 /* |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
296 * A1 = X4 * cos(7pi/16) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
297 * A0 = X7 * cos(7pi/16) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
298 * A1 = A1 + X7 * cos(pi/16) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
299 * A0 = A0 - X4 * cos(pi/16) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
300 */ |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
301 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
302 A1=R2.H*R7.L, A0=R2.L*R7.L || W[P0++P3]=R6.H || R0=[I0++]; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
303 R2.H=(A1+=R2.L*R7.H),R2.L=(A0-=R2.H*R7.H) || W[P0++P3]=R5.H || R7=[I3++]; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
304 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
305 /* |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
306 * A1 = X5 * cos(3pi/16) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
307 * A0 = X6 * cos(3pi/16) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
308 * A1 = A1 + X6 * cos(5pi/16) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
309 * A0 = A0 - X5 * cos(5pi/16) |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
310 * The output values are written. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
311 */ |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
312 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
313 A1=R4.H*R7.H, A0=R4.L*R7.H || W[P0++P2]=R6.L || R1.H=W[I0++]; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
314 R4.H=(A1+=R4.L*R7.L),R4.L=(A0-=R4.H*R7.L) || W[P0++P4]=R2.L || R1.L=W[I0++]; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
315 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
316 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
317 /* Beginning of next stage, **pipelined** + drain and store the |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
318 rest of the column store. */ |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
319 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
320 R0=R0+|+R3,R3=R0-|-R3 || W[P1++P3]=R2.H || R2=[I2++]; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
321 R1=R1+|+R2,R2=R1-|-R2 (CO) || W[P1++P3]=R4.L || R7=[I3++]; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
322 .row1: R0=R0+|+R1,R1=R0-|-R1 || W[P1++P5]=R4.H || NOP; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
323 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
324 // Exchange input with output. |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
325 B1 = B0; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
326 B0 = B2; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
327 .3: B2 = B1; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
328 |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
329 L3=0; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
330 (r7:4,p5:3) = [sp++]; |
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
331 RTS; |
5001 | 332 DEFUN_END(fdct) |
4765
85298e8c55c4
bfin dsputils, basic pixel operations sads, diffs, motion compensation
diego
parents:
diff
changeset
|
333 |