annotate h264idct.c @ 10323:1f5c4d2ce77f libavcodec

Simplify stereo rematrixing by only using one temporary variable. It is also about 1.8% faster on my system.
author jbr
date Wed, 30 Sep 2009 01:25:04 +0000
parents e9d9d946f213
children 7dd2a45249a9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2272
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
1 /*
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
2 * H.264 IDCT
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
3 * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
4 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3105
diff changeset
5 * This file is part of FFmpeg.
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3105
diff changeset
6 *
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3105
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
2272
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3105
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
2272
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
11 *
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3105
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
2272
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
15 * Lesser General Public License for more details.
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
16 *
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
3947
c8c591fe26f8 Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents: 3105
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
3036
0b546eab515d Update licensing information: The FSF changed postal address.
diego
parents: 2967
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2272
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
20 */
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2755
diff changeset
21
2272
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
22 /**
8718
e9d9d946f213 Use full internal pathname in doxygen @file directives.
diego
parents: 8673
diff changeset
23 * @file libavcodec/h264idct.c
2272
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
24 * H.264 IDCT.
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
25 * @author Michael Niedermayer <michaelni@gmx.at>
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
26 */
2967
ef2149182f1c COSMETICS: Remove all trailing whitespace.
diego
parents: 2755
diff changeset
27
2272
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
28 #include "dsputil.h"
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
29
4283
d6f83e2f8804 rename always_inline to av_always_inline and move to common.h
mru
parents: 4176
diff changeset
30 static av_always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){
2272
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
31 int i;
4176
23da44e8fd05 rename cropTbl -> ff_cropTbl
mru
parents: 3947
diff changeset
32 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2272
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
33
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
34 block[0] += 1<<(shift-1);
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
35
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
36 for(i=0; i<4; i++){
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
37 const int z0= block[0 + block_stride*i] + block[2 + block_stride*i];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
38 const int z1= block[0 + block_stride*i] - block[2 + block_stride*i];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
39 const int z2= (block[1 + block_stride*i]>>1) - block[3 + block_stride*i];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
40 const int z3= block[1 + block_stride*i] + (block[3 + block_stride*i]>>1);
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
41
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
42 block[0 + block_stride*i]= z0 + z3;
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
43 block[1 + block_stride*i]= z1 + z2;
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
44 block[2 + block_stride*i]= z1 - z2;
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
45 block[3 + block_stride*i]= z0 - z3;
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
46 }
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
47
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
48 for(i=0; i<4; i++){
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
49 const int z0= block[i + block_stride*0] + block[i + block_stride*2];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
50 const int z1= block[i + block_stride*0] - block[i + block_stride*2];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
51 const int z2= (block[i + block_stride*1]>>1) - block[i + block_stride*3];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
52 const int z3= block[i + block_stride*1] + (block[i + block_stride*3]>>1);
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
53
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
54 dst[i + 0*stride]= cm[ add*dst[i + 0*stride] + ((z0 + z3) >> shift) ];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
55 dst[i + 1*stride]= cm[ add*dst[i + 1*stride] + ((z1 + z2) >> shift) ];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
56 dst[i + 2*stride]= cm[ add*dst[i + 2*stride] + ((z1 - z2) >> shift) ];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
57 dst[i + 3*stride]= cm[ add*dst[i + 3*stride] + ((z0 - z3) >> shift) ];
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
58 }
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
59 }
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
60
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
61 void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride){
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
62 idct_internal(dst, block, stride, 4, 6, 1);
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
63 }
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
64
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
65 void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block){
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
66 idct_internal(dst, block, stride, 8, 3, 1);
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
67 }
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
68
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
69 void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block){
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
70 idct_internal(dst, block, stride, 8, 3, 0);
cd43603c46f9 move h264 idct to its own file and call via function pointer in DspContext
michael
parents:
diff changeset
71 }
2755
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
72
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
73 void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
74 int i;
4176
23da44e8fd05 rename cropTbl -> ff_cropTbl
mru
parents: 3947
diff changeset
75 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2755
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
76
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
77 block[0] += 32;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
78
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
79 for( i = 0; i < 8; i++ )
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
80 {
8457
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
81 const int a0 = block[0+i*8] + block[4+i*8];
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
82 const int a2 = block[0+i*8] - block[4+i*8];
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
83 const int a4 = (block[2+i*8]>>1) - block[6+i*8];
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
84 const int a6 = (block[6+i*8]>>1) + block[2+i*8];
2755
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
85
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
86 const int b0 = a0 + a6;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
87 const int b2 = a2 + a4;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
88 const int b4 = a2 - a4;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
89 const int b6 = a0 - a6;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
90
8457
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
91 const int a1 = -block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1);
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
92 const int a3 = block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1);
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
93 const int a5 = -block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1);
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
94 const int a7 = block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1);
2755
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
95
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
96 const int b1 = (a7>>2) + a1;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
97 const int b3 = a3 + (a5>>2);
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
98 const int b5 = (a3>>2) - a5;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
99 const int b7 = a7 - (a1>>2);
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
100
8457
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
101 block[0+i*8] = b0 + b7;
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
102 block[7+i*8] = b0 - b7;
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
103 block[1+i*8] = b2 + b5;
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
104 block[6+i*8] = b2 - b5;
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
105 block[2+i*8] = b4 + b3;
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
106 block[5+i*8] = b4 - b3;
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
107 block[3+i*8] = b6 + b1;
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
108 block[4+i*8] = b6 - b1;
2755
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
109 }
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
110 for( i = 0; i < 8; i++ )
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
111 {
8457
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
112 const int a0 = block[i+0*8] + block[i+4*8];
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
113 const int a2 = block[i+0*8] - block[i+4*8];
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
114 const int a4 = (block[i+2*8]>>1) - block[i+6*8];
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
115 const int a6 = (block[i+6*8]>>1) + block[i+2*8];
2755
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
116
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
117 const int b0 = a0 + a6;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
118 const int b2 = a2 + a4;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
119 const int b4 = a2 - a4;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
120 const int b6 = a0 - a6;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
121
8457
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
122 const int a1 = -block[i+3*8] + block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1);
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
123 const int a3 = block[i+1*8] + block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1);
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
124 const int a5 = -block[i+1*8] + block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1);
fa07932f2c89 flatten an array, since gcc fails at optimizing multidimensional arrays
lorenm
parents: 8375
diff changeset
125 const int a7 = block[i+3*8] + block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1);
2755
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
126
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
127 const int b1 = (a7>>2) + a1;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
128 const int b3 = a3 + (a5>>2);
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
129 const int b5 = (a3>>2) - a5;
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
130 const int b7 = a7 - (a1>>2);
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
131
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
132 dst[i + 0*stride] = cm[ dst[i + 0*stride] + ((b0 + b7) >> 6) ];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
133 dst[i + 1*stride] = cm[ dst[i + 1*stride] + ((b2 + b5) >> 6) ];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
134 dst[i + 2*stride] = cm[ dst[i + 2*stride] + ((b4 + b3) >> 6) ];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
135 dst[i + 3*stride] = cm[ dst[i + 3*stride] + ((b6 + b1) >> 6) ];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
136 dst[i + 4*stride] = cm[ dst[i + 4*stride] + ((b6 - b1) >> 6) ];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
137 dst[i + 5*stride] = cm[ dst[i + 5*stride] + ((b4 - b3) >> 6) ];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
138 dst[i + 6*stride] = cm[ dst[i + 6*stride] + ((b2 - b5) >> 6) ];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
139 dst[i + 7*stride] = cm[ dst[i + 7*stride] + ((b0 - b7) >> 6) ];
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
140 }
975074f04b95 decode H.264 with 8x8 transform.
lorenm
parents: 2272
diff changeset
141 }
3105
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
142
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
143 // assumes all AC coefs are 0
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
144 void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
145 int i, j;
4176
23da44e8fd05 rename cropTbl -> ff_cropTbl
mru
parents: 3947
diff changeset
146 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
3105
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
147 int dc = (block[0] + 32) >> 6;
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
148 for( j = 0; j < 4; j++ )
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
149 {
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
150 for( i = 0; i < 4; i++ )
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
151 dst[i] = cm[ dst[i] + dc ];
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
152 dst += stride;
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
153 }
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
154 }
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
155
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
156 void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
157 int i, j;
4176
23da44e8fd05 rename cropTbl -> ff_cropTbl
mru
parents: 3947
diff changeset
158 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
3105
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
159 int dc = (block[0] + 32) >> 6;
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
160 for( j = 0; j < 8; j++ )
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
161 {
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
162 for( i = 0; i < 8; i++ )
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
163 dst[i] = cm[ dst[i] + dc ];
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
164 dst += stride;
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
165 }
2d35fb3cb940 h264: special case dc-only idct. ~1% faster overall
lorenm
parents: 3036
diff changeset
166 }
8375
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
167
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
168 //FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
169 static const uint8_t scan8[16 + 2*4]={
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
170 4+1*8, 5+1*8, 4+2*8, 5+2*8,
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
171 6+1*8, 7+1*8, 6+2*8, 7+2*8,
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
172 4+3*8, 5+3*8, 4+4*8, 5+4*8,
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
173 6+3*8, 7+3*8, 6+4*8, 7+4*8,
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
174 1+1*8, 2+1*8,
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
175 1+2*8, 2+2*8,
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
176 1+4*8, 2+4*8,
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
177 1+5*8, 2+5*8,
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
178 };
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
179
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
180 void ff_h264_idct_add16_c(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
181 int i;
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
182 for(i=0; i<16; i++){
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
183 int nnz = nnzc[ scan8[i] ];
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
184 if(nnz){
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
185 if(nnz==1 && block[i*16]) ff_h264_idct_dc_add_c(dst + block_offset[i], block + i*16, stride);
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
186 else idct_internal (dst + block_offset[i], block + i*16, stride, 4, 6, 1);
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
187 }
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
188 }
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
189 }
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
190
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
191 void ff_h264_idct_add16intra_c(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
192 int i;
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
193 for(i=0; i<16; i++){
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
194 if(nnzc[ scan8[i] ]) idct_internal (dst + block_offset[i], block + i*16, stride, 4, 6, 1);
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
195 else if(block[i*16]) ff_h264_idct_dc_add_c(dst + block_offset[i], block + i*16, stride);
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
196 }
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
197 }
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
198
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
199 void ff_h264_idct8_add4_c(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
200 int i;
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
201 for(i=0; i<16; i+=4){
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
202 int nnz = nnzc[ scan8[i] ];
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
203 if(nnz){
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
204 if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_c(dst + block_offset[i], block + i*16, stride);
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
205 else ff_h264_idct8_add_c (dst + block_offset[i], block + i*16, stride);
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
206 }
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
207 }
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
208 }
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
209
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
210 void ff_h264_idct_add8_c(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
211 int i;
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
212 for(i=16; i<16+8; i++){
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
213 if(nnzc[ scan8[i] ])
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
214 ff_h264_idct_add_c (dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
215 else if(block[i*16])
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
216 ff_h264_idct_dc_add_c(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
217 }
de2509cf3c44 H.264 idct functions that include the chroma, inter luma and intra16 luma loops
michael
parents: 5215
diff changeset
218 }