annotate h264idct.c @ 3990:746a60ba3177 libavcodec

enable CMOV_IS_FAST as its faster or equal speed on every cpu (duron, athlon, PM, P3) from which ive seen benchmarks, it might be slower on P4 but noone has posted benchmarks ...
author michael
date Wed, 11 Oct 2006 12:23:40 +0000
parents c8c591fe26f8
children 23da44e8fd05
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2272
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
1 /*
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
2 * H.264 IDCT
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
3 * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
4 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3105
diff changeset
5 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3105
diff changeset
6 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3105
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
2272
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3105
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
2272
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
11 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3105
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
2272
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
15 * Lesser General Public License for more details.
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
16 *
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3105
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2967
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2272
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
20 *
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
21 */
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2755
diff changeset
22
2272
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
23 /**
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
24 * @file h264-idct.c
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
25 * H.264 IDCT.
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
26 * @author Michael Niedermayer <michaelni@gmx.at>
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
27 */
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2755
diff changeset
28
2272
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
29 #include "dsputil.h"
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
30
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
31 static always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
32 int i;
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
33 uint8_t *cm = cropTbl + MAX_NEG_CROP;
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
34
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
35 block[0] += 1<<(shift-1);
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
36
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
37 for(i=0; i<4; i++){
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
38 const int z0= block[0 + block_stride*i] + block[2 + block_stride*i];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
39 const int z1= block[0 + block_stride*i] - block[2 + block_stride*i];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
40 const int z2= (block[1 + block_stride*i]>>1) - block[3 + block_stride*i];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
41 const int z3= block[1 + block_stride*i] + (block[3 + block_stride*i]>>1);
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
42
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
43 block[0 + block_stride*i]= z0 + z3;
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
44 block[1 + block_stride*i]= z1 + z2;
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
45 block[2 + block_stride*i]= z1 - z2;
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
46 block[3 + block_stride*i]= z0 - z3;
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
47 }
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
48
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
49 for(i=0; i<4; i++){
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
50 const int z0= block[i + block_stride*0] + block[i + block_stride*2];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
51 const int z1= block[i + block_stride*0] - block[i + block_stride*2];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
52 const int z2= (block[i + block_stride*1]>>1) - block[i + block_stride*3];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
53 const int z3= block[i + block_stride*1] + (block[i + block_stride*3]>>1);
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
54
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
55 dst[i + 0*stride]= cm[ add*dst[i + 0*stride] + ((z0 + z3) >> shift) ];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
56 dst[i + 1*stride]= cm[ add*dst[i + 1*stride] + ((z1 + z2) >> shift) ];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
57 dst[i + 2*stride]= cm[ add*dst[i + 2*stride] + ((z1 - z2) >> shift) ];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
58 dst[i + 3*stride]= cm[ add*dst[i + 3*stride] + ((z0 - z3) >> shift) ];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
59 }
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
60 }
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
61
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
62 void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride){
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
63 idct_internal(dst, block, stride, 4, 6, 1);
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
64 }
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
65
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
66 void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block){
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
67 idct_internal(dst, block, stride, 8, 3, 1);
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
68 }
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
69
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
70 void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block){
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
71 idct_internal(dst, block, stride, 8, 3, 0);
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
72 }
2755
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
73
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
74 void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
75 int i;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
76 DCTELEM (*src)[8] = (DCTELEM(*)[8])block;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
77 uint8_t *cm = cropTbl + MAX_NEG_CROP;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
78
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
79 block[0] += 32;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
80
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
81 for( i = 0; i < 8; i++ )
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
82 {
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
83 const int a0 = src[i][0] + src[i][4];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
84 const int a2 = src[i][0] - src[i][4];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
85 const int a4 = (src[i][2]>>1) - src[i][6];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
86 const int a6 = (src[i][6]>>1) + src[i][2];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
87
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
88 const int b0 = a0 + a6;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
89 const int b2 = a2 + a4;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
90 const int b4 = a2 - a4;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
91 const int b6 = a0 - a6;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
92
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
93 const int a1 = -src[i][3] + src[i][5] - src[i][7] - (src[i][7]>>1);
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
94 const int a3 = src[i][1] + src[i][7] - src[i][3] - (src[i][3]>>1);
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
95 const int a5 = -src[i][1] + src[i][7] + src[i][5] + (src[i][5]>>1);
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
96 const int a7 = src[i][3] + src[i][5] + src[i][1] + (src[i][1]>>1);
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
97
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
98 const int b1 = (a7>>2) + a1;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
99 const int b3 = a3 + (a5>>2);
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
100 const int b5 = (a3>>2) - a5;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
101 const int b7 = a7 - (a1>>2);
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
102
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
103 src[i][0] = b0 + b7;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
104 src[i][7] = b0 - b7;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
105 src[i][1] = b2 + b5;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
106 src[i][6] = b2 - b5;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
107 src[i][2] = b4 + b3;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
108 src[i][5] = b4 - b3;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
109 src[i][3] = b6 + b1;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
110 src[i][4] = b6 - b1;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
111 }
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
112 for( i = 0; i < 8; i++ )
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
113 {
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
114 const int a0 = src[0][i] + src[4][i];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
115 const int a2 = src[0][i] - src[4][i];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
116 const int a4 = (src[2][i]>>1) - src[6][i];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
117 const int a6 = (src[6][i]>>1) + src[2][i];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
118
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
119 const int b0 = a0 + a6;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
120 const int b2 = a2 + a4;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
121 const int b4 = a2 - a4;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
122 const int b6 = a0 - a6;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
123
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
124 const int a1 = -src[3][i] + src[5][i] - src[7][i] - (src[7][i]>>1);
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
125 const int a3 = src[1][i] + src[7][i] - src[3][i] - (src[3][i]>>1);
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
126 const int a5 = -src[1][i] + src[7][i] + src[5][i] + (src[5][i]>>1);
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
127 const int a7 = src[3][i] + src[5][i] + src[1][i] + (src[1][i]>>1);
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
128
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
129 const int b1 = (a7>>2) + a1;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
130 const int b3 = a3 + (a5>>2);
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
131 const int b5 = (a3>>2) - a5;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
132 const int b7 = a7 - (a1>>2);
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
133
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
134 dst[i + 0*stride] = cm[ dst[i + 0*stride] + ((b0 + b7) >> 6) ];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
135 dst[i + 1*stride] = cm[ dst[i + 1*stride] + ((b2 + b5) >> 6) ];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
136 dst[i + 2*stride] = cm[ dst[i + 2*stride] + ((b4 + b3) >> 6) ];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
137 dst[i + 3*stride] = cm[ dst[i + 3*stride] + ((b6 + b1) >> 6) ];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
138 dst[i + 4*stride] = cm[ dst[i + 4*stride] + ((b6 - b1) >> 6) ];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
139 dst[i + 5*stride] = cm[ dst[i + 5*stride] + ((b4 - b3) >> 6) ];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
140 dst[i + 6*stride] = cm[ dst[i + 6*stride] + ((b2 - b5) >> 6) ];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
141 dst[i + 7*stride] = cm[ dst[i + 7*stride] + ((b0 - b7) >> 6) ];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
142 }
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
143 }
3105
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
144
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
145 // assumes all AC coefs are 0
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
146 void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
147 int i, j;
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
148 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
149 int dc = (block[0] + 32) >> 6;
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
150 for( j = 0; j < 4; j++ )
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
151 {
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
152 for( i = 0; i < 4; i++ )
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
153 dst[i] = cm[ dst[i] + dc ];
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
154 dst += stride;
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
155 }
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
156 }
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
157
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
158 void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
159 int i, j;
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
160 uint8_t *cm = cropTbl + MAX_NEG_CROP;
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
161 int dc = (block[0] + 32) >> 6;
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
162 for( j = 0; j < 8; j++ )
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
163 {
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
164 for( i = 0; i < 8; i++ )
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
165 dst[i] = cm[ dst[i] + dc ];
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
166 dst += stride;
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
167 }
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
168 }