Mercurial > libavcodec.hg
annotate h264idct.c @ 3990:746a60ba3177 libavcodec
enable CMOV_IS_FAST as its faster or equal speed on every cpu (duron, athlon, PM, P3) from which ive seen benchmarks, it might be slower on P4 but noone has posted benchmarks ...
author | michael |
---|---|
date | Wed, 11 Oct 2006 12:23:40 +0000 |
parents | c8c591fe26f8 |
children | 23da44e8fd05 |
rev | line source |
---|---|
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
1 /* |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
2 * H.264 IDCT |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
3 * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at> |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
4 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3105
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3105
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3105
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
8 * modify it under the terms of the GNU Lesser General Public |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
9 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3105
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3105
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
15 * Lesser General Public License for more details. |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
16 * |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
17 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3105
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2967
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
20 * |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
21 */ |
2967 | 22 |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
23 /** |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
24 * @file h264-idct.c |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
25 * H.264 IDCT. |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
26 * @author Michael Niedermayer <michaelni@gmx.at> |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
27 */ |
2967 | 28 |
2272
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
29 #include "dsputil.h" |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
30 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
31 static always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
32 int i; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
33 uint8_t *cm = cropTbl + MAX_NEG_CROP; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
34 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
35 block[0] += 1<<(shift-1); |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
36 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
37 for(i=0; i<4; i++){ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
38 const int z0= block[0 + block_stride*i] + block[2 + block_stride*i]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
39 const int z1= block[0 + block_stride*i] - block[2 + block_stride*i]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
40 const int z2= (block[1 + block_stride*i]>>1) - block[3 + block_stride*i]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
41 const int z3= block[1 + block_stride*i] + (block[3 + block_stride*i]>>1); |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
42 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
43 block[0 + block_stride*i]= z0 + z3; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
44 block[1 + block_stride*i]= z1 + z2; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
45 block[2 + block_stride*i]= z1 - z2; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
46 block[3 + block_stride*i]= z0 - z3; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
47 } |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
48 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
49 for(i=0; i<4; i++){ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
50 const int z0= block[i + block_stride*0] + block[i + block_stride*2]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
51 const int z1= block[i + block_stride*0] - block[i + block_stride*2]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
52 const int z2= (block[i + block_stride*1]>>1) - block[i + block_stride*3]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
53 const int z3= block[i + block_stride*1] + (block[i + block_stride*3]>>1); |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
54 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
55 dst[i + 0*stride]= cm[ add*dst[i + 0*stride] + ((z0 + z3) >> shift) ]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
56 dst[i + 1*stride]= cm[ add*dst[i + 1*stride] + ((z1 + z2) >> shift) ]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
57 dst[i + 2*stride]= cm[ add*dst[i + 2*stride] + ((z1 - z2) >> shift) ]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
58 dst[i + 3*stride]= cm[ add*dst[i + 3*stride] + ((z0 - z3) >> shift) ]; |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
59 } |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
60 } |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
61 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
62 void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride){ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
63 idct_internal(dst, block, stride, 4, 6, 1); |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
64 } |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
65 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
66 void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block){ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
67 idct_internal(dst, block, stride, 8, 3, 1); |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
68 } |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
69 |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
70 void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block){ |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
71 idct_internal(dst, block, stride, 8, 3, 0); |
cd43603c46f9
move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff
changeset
|
72 } |
2755 | 73 |
74 void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){ | |
75 int i; | |
76 DCTELEM (*src)[8] = (DCTELEM(*)[8])block; | |
77 uint8_t *cm = cropTbl + MAX_NEG_CROP; | |
78 | |
79 block[0] += 32; | |
80 | |
81 for( i = 0; i < 8; i++ ) | |
82 { | |
83 const int a0 = src[i][0] + src[i][4]; | |
84 const int a2 = src[i][0] - src[i][4]; | |
85 const int a4 = (src[i][2]>>1) - src[i][6]; | |
86 const int a6 = (src[i][6]>>1) + src[i][2]; | |
87 | |
88 const int b0 = a0 + a6; | |
89 const int b2 = a2 + a4; | |
90 const int b4 = a2 - a4; | |
91 const int b6 = a0 - a6; | |
92 | |
93 const int a1 = -src[i][3] + src[i][5] - src[i][7] - (src[i][7]>>1); | |
94 const int a3 = src[i][1] + src[i][7] - src[i][3] - (src[i][3]>>1); | |
95 const int a5 = -src[i][1] + src[i][7] + src[i][5] + (src[i][5]>>1); | |
96 const int a7 = src[i][3] + src[i][5] + src[i][1] + (src[i][1]>>1); | |
97 | |
98 const int b1 = (a7>>2) + a1; | |
99 const int b3 = a3 + (a5>>2); | |
100 const int b5 = (a3>>2) - a5; | |
101 const int b7 = a7 - (a1>>2); | |
102 | |
103 src[i][0] = b0 + b7; | |
104 src[i][7] = b0 - b7; | |
105 src[i][1] = b2 + b5; | |
106 src[i][6] = b2 - b5; | |
107 src[i][2] = b4 + b3; | |
108 src[i][5] = b4 - b3; | |
109 src[i][3] = b6 + b1; | |
110 src[i][4] = b6 - b1; | |
111 } | |
112 for( i = 0; i < 8; i++ ) | |
113 { | |
114 const int a0 = src[0][i] + src[4][i]; | |
115 const int a2 = src[0][i] - src[4][i]; | |
116 const int a4 = (src[2][i]>>1) - src[6][i]; | |
117 const int a6 = (src[6][i]>>1) + src[2][i]; | |
118 | |
119 const int b0 = a0 + a6; | |
120 const int b2 = a2 + a4; | |
121 const int b4 = a2 - a4; | |
122 const int b6 = a0 - a6; | |
123 | |
124 const int a1 = -src[3][i] + src[5][i] - src[7][i] - (src[7][i]>>1); | |
125 const int a3 = src[1][i] + src[7][i] - src[3][i] - (src[3][i]>>1); | |
126 const int a5 = -src[1][i] + src[7][i] + src[5][i] + (src[5][i]>>1); | |
127 const int a7 = src[3][i] + src[5][i] + src[1][i] + (src[1][i]>>1); | |
128 | |
129 const int b1 = (a7>>2) + a1; | |
130 const int b3 = a3 + (a5>>2); | |
131 const int b5 = (a3>>2) - a5; | |
132 const int b7 = a7 - (a1>>2); | |
133 | |
134 dst[i + 0*stride] = cm[ dst[i + 0*stride] + ((b0 + b7) >> 6) ]; | |
135 dst[i + 1*stride] = cm[ dst[i + 1*stride] + ((b2 + b5) >> 6) ]; | |
136 dst[i + 2*stride] = cm[ dst[i + 2*stride] + ((b4 + b3) >> 6) ]; | |
137 dst[i + 3*stride] = cm[ dst[i + 3*stride] + ((b6 + b1) >> 6) ]; | |
138 dst[i + 4*stride] = cm[ dst[i + 4*stride] + ((b6 - b1) >> 6) ]; | |
139 dst[i + 5*stride] = cm[ dst[i + 5*stride] + ((b4 - b3) >> 6) ]; | |
140 dst[i + 6*stride] = cm[ dst[i + 6*stride] + ((b2 - b5) >> 6) ]; | |
141 dst[i + 7*stride] = cm[ dst[i + 7*stride] + ((b0 - b7) >> 6) ]; | |
142 } | |
143 } | |
3105
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
144 |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
145 // assumes all AC coefs are 0 |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
146 void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){ |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
147 int i, j; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
148 uint8_t *cm = cropTbl + MAX_NEG_CROP; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
149 int dc = (block[0] + 32) >> 6; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
150 for( j = 0; j < 4; j++ ) |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
151 { |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
152 for( i = 0; i < 4; i++ ) |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
153 dst[i] = cm[ dst[i] + dc ]; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
154 dst += stride; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
155 } |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
156 } |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
157 |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
158 void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){ |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
159 int i, j; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
160 uint8_t *cm = cropTbl + MAX_NEG_CROP; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
161 int dc = (block[0] + 32) >> 6; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
162 for( j = 0; j < 8; j++ ) |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
163 { |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
164 for( i = 0; i < 8; i++ ) |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
165 dst[i] = cm[ dst[i] + dc ]; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
166 dst += stride; |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
167 } |
2d35fb3cb940
h264: special case dc-only idct. ~1% faster overall
lorenm
parents:
3036
diff
changeset
|
168 } |