Mercurial > libavcodec.hg
annotate h264idct.c @ 3198:6b9f0c4fbdbe libavcodec
First part of a series of speed-enchancing patches.
This one sets up a snow.h and makes snow use the dsputil function pointer
framework to access the three functions that will be implemented in asm
in the other parts of the patchset.
Patch by Robert Edele < yartrebo AH earthlink POIS net>
Original thread:
Subject: [Ffmpeg-devel] [PATCH] Snow mmx+sse2 asm optimizations
Date: Sun, 05 Feb 2006 12:47:14 -0500
author | gpoirier |
---|---|
date | Thu, 16 Mar 2006 19:18:18 +0000 |
parents | 2d35fb3cb940 |
children | c8c591fe26f8 |
rev | line source |
---|---|
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
1 /* |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
2 * H.264 IDCT |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
3 * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at> |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
4 * |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
5 * This library is free software; you can redistribute it and/or |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
6 * modify it under the terms of the GNU Lesser General Public |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
7 * License as published by the Free Software Foundation; either |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
8 * version 2 of the License, or (at your option) any later version. |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
9 * |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
10 * This library is distributed in the hope that it will be useful, |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
13 * Lesser General Public License for more details. |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
14 * |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
15 * You should have received a copy of the GNU Lesser General Public |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
16 * License along with this library; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2967
diff
changeset
|
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
18 * |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
19 */ |
2967 | 20 |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
21 /** |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
22 * @file h264-idct.c |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
23 * H.264 IDCT. |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
24 * @author Michael Niedermayer <michaelni@gmx.at> |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
25 */ |
2967 | 26 |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
27 #include "dsputil.h" |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
28 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
29 static always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
30 int i; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
31 uint8_t *cm = cropTbl + MAX_NEG_CROP; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
32 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
33 block[0] += 1<<(shift-1); |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
34 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
35 for(i=0; i<4; i++){ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
36 const int z0= block[0 + block_stride*i] + block[2 + block_stride*i]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
37 const int z1= block[0 + block_stride*i] - block[2 + block_stride*i]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
38 const int z2= (block[1 + block_stride*i]>>1) - block[3 + block_stride*i]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
39 const int z3= block[1 + block_stride*i] + (block[3 + block_stride*i]>>1); |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
40 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
41 block[0 + block_stride*i]= z0 + z3; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
42 block[1 + block_stride*i]= z1 + z2; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
43 block[2 + block_stride*i]= z1 - z2; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
44 block[3 + block_stride*i]= z0 - z3; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
45 } |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
46 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
47 for(i=0; i<4; i++){ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
48 const int z0= block[i + block_stride*0] + block[i + block_stride*2]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
49 const int z1= block[i + block_stride*0] - block[i + block_stride*2]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
50 const int z2= (block[i + block_stride*1]>>1) - block[i + block_stride*3]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
51 const int z3= block[i + block_stride*1] + (block[i + block_stride*3]>>1); |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
52 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
53 dst[i + 0*stride]= cm[ add*dst[i + 0*stride] + ((z0 + z3) >> shift) ]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
54 dst[i + 1*stride]= cm[ add*dst[i + 1*stride] + ((z1 + z2) >> shift) ]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
55 dst[i + 2*stride]= cm[ add*dst[i + 2*stride] + ((z1 - z2) >> shift) ]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
56 dst[i + 3*stride]= cm[ add*dst[i + 3*stride] + ((z0 - z3) >> shift) ]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
57 } |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
58 } |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
59 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
60 void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride){ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
61 idct_internal(dst, block, stride, 4, 6, 1); |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
62 } |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
63 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
64 void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block){ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
65 idct_internal(dst, block, stride, 8, 3, 1); |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
66 } |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
67 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
68 void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block){ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
69 idct_internal(dst, block, stride, 8, 3, 0); |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
70 } |
2755 | 71 |
72 void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){ | |
73 int i; | |
74 DCTELEM (*src)[8] = (DCTELEM(*)[8])block; | |
75 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
76 | |
77 block[0] += 32; | |
78 | |
79 for( i = 0; i < 8; i++ ) | |
80 { | |
81 const int a0 = src[i][0] + src[i][4]; | |
82 const int a2 = src[i][0] - src[i][4]; | |
83 const int a4 = (src[i][2]>>1) - src[i][6]; | |
84 const int a6 = (src[i][6]>>1) + src[i][2]; | |
85 | |
86 const int b0 = a0 + a6; | |
87 const int b2 = a2 + a4; | |
88 const int b4 = a2 - a4; | |
89 const int b6 = a0 - a6; | |
90 | |
91 const int a1 = -src[i][3] + src[i][5] - src[i][7] - (src[i][7]>>1); | |
92 const int a3 = src[i][1] + src[i][7] - src[i][3] - (src[i][3]>>1); | |
93 const int a5 = -src[i][1] + src[i][7] + src[i][5] + (src[i][5]>>1); | |
94 const int a7 = src[i][3] + src[i][5] + src[i][1] + (src[i][1]>>1); | |
95 | |
96 const int b1 = (a7>>2) + a1; | |
97 const int b3 = a3 + (a5>>2); | |
98 const int b5 = (a3>>2) - a5; | |
99 const int b7 = a7 - (a1>>2); | |
100 | |
101 src[i][0] = b0 + b7; | |
102 src[i][7] = b0 - b7; | |
103 src[i][1] = b2 + b5; | |
104 src[i][6] = b2 - b5; | |
105 src[i][2] = b4 + b3; | |
106 src[i][5] = b4 - b3; | |
107 src[i][3] = b6 + b1; | |
108 src[i][4] = b6 - b1; | |
109 } | |
110 for( i = 0; i < 8; i++ ) | |
111 { | |
112 const int a0 = src[0][i] + src[4][i]; | |
113 const int a2 = src[0][i] - src[4][i]; | |
114 const int a4 = (src[2][i]>>1) - src[6][i]; | |
115 const int a6 = (src[6][i]>>1) + src[2][i]; | |
116 | |
117 const int b0 = a0 + a6; | |
118 const int b2 = a2 + a4; | |
119 const int b4 = a2 - a4; | |
120 const int b6 = a0 - a6; | |
121 | |
122 const int a1 = -src[3][i] + src[5][i] - src[7][i] - (src[7][i]>>1); | |
123 const int a3 = src[1][i] + src[7][i] - src[3][i] - (src[3][i]>>1); | |
124 const int a5 = -src[1][i] + src[7][i] + src[5][i] + (src[5][i]>>1); | |
125 const int a7 = src[3][i] + src[5][i] + src[1][i] + (src[1][i]>>1); | |
126 | |
127 const int b1 = (a7>>2) + a1; | |
128 const int b3 = a3 + (a5>>2); | |
129 const int b5 = (a3>>2) - a5; | |
130 const int b7 = a7 - (a1>>2); | |
131 | |
132 dst[i + 0*stride] = cm[ dst[i + 0*stride] + ((b0 + b7) >> 6) ]; | |
133 dst[i + 1*stride] = cm[ dst[i + 1*stride] + ((b2 + b5) >> 6) ]; | |
134 dst[i + 2*stride] = cm[ dst[i + 2*stride] + ((b4 + b3) >> 6) ]; | |
135 dst[i + 3*stride] = cm[ dst[i + 3*stride] + ((b6 + b1) >> 6) ]; | |
136 dst[i + 4*stride] = cm[ dst[i + 4*stride] + ((b6 - b1) >> 6) ]; | |
137 dst[i + 5*stride] = cm[ dst[i + 5*stride] + ((b4 - b3) >> 6) ]; | |
138 dst[i + 6*stride] = cm[ dst[i + 6*stride] + ((b2 - b5) >> 6) ]; | |
139 dst[i + 7*stride] = cm[ dst[i + 7*stride] + ((b0 - b7) >> 6) ]; | |
140 } | |
141 } | |
3105
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
142 |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
143 // assumes all AC coefs are 0 |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
144 void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){ |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
145 int i, j; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
146 uint8_t *cm = cropTbl + MAX_NEG_CROP; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
147 int dc = (block[0] + 32) >> 6; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
148 for( j = 0; j < 4; j++ ) |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
149 { |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
150 for( i = 0; i < 4; i++ ) |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
151 dst[i] = cm[ dst[i] + dc ]; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
152 dst += stride; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
153 } |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
154 } |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
155 |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
156 void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){ |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
157 int i, j; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
158 uint8_t *cm = cropTbl + MAX_NEG_CROP; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
159 int dc = (block[0] + 32) >> 6; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
160 for( j = 0; j < 8; j++ ) |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
161 { |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
162 for( i = 0; i < 8; i++ ) |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
163 dst[i] = cm[ dst[i] + dc ]; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
164 dst += stride; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
165 } |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
166 } |