Mercurial > libavcodec.hg
annotate snow.c @ 9472:05663d250d5b libavcodec
Remove unused variable from wma_decode_block() found by CSA.
author | michael |
---|---|
date | Fri, 17 Apr 2009 15:35:32 +0000 |
parents | 5c58923d47d2 |
children | e1d76267dce8 |
rev | line source |
---|---|
2138 | 1 /* |
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at> | |
3 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
4 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
5 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
6 * FFmpeg is free software; you can redistribute it and/or |
2138 | 7 * modify it under the terms of the GNU Lesser General Public |
8 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
9 * version 2.1 of the License, or (at your option) any later version. |
2138 | 10 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
11 * FFmpeg is distributed in the hope that it will be useful, |
2138 | 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
17 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3035
diff
changeset
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
2138 | 19 */ |
20 | |
21 #include "avcodec.h" | |
22 #include "dsputil.h" | |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
23 #include "snow.h" |
2335 | 24 |
25 #include "rangecoder.h" | |
8627
d6bab465b82c
moves mid_pred() into mathops.h (with arch specific code split by directory)
aurel
parents:
8611
diff
changeset
|
26 #include "mathops.h" |
2138 | 27 |
28 #include "mpegvideo.h" | |
29 | |
30 #undef NDEBUG | |
31 #include <assert.h> | |
32 | |
33 static const int8_t quant3[256]={ | |
34 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, | |
50 }; | |
51 static const int8_t quant3b[256]={ | |
52 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
68 }; | |
2596 | 69 static const int8_t quant3bA[256]={ |
70 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
86 }; | |
2138 | 87 static const int8_t quant5[256]={ |
88 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1, | |
104 }; | |
105 static const int8_t quant7[256]={ | |
106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, | |
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2, | |
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1, | |
122 }; | |
123 static const int8_t quant9[256]={ | |
124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3, | |
139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1, | |
140 }; | |
141 static const int8_t quant11[256]={ | |
142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, | |
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4, | |
156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1, | |
158 }; | |
159 static const int8_t quant13[256]={ | |
160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | |
161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5, | |
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1, | |
176 }; | |
177 | |
178 #if 0 //64*cubic | |
179 static const uint8_t obmc32[1024]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
181 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
182 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
183 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
184 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
185 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
186 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
187 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
188 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
189 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
190 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
191 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
192 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
193 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
194 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
196 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
198 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
199 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
200 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
201 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
202 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
203 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
204 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
205 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
206 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
207 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
208 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
209 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
210 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2138 | 212 //error:0.000022 |
213 }; | |
214 static const uint8_t obmc16[256]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
215 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
216 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
217 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
218 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
219 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
220 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
221 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
223 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
225 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
226 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
227 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
228 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
229 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
230 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
2138 | 231 //error:0.000033 |
232 }; | |
233 #elif 1 // 64*linear | |
234 static const uint8_t obmc32[1024]={ | |
3206 | 235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, |
236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, | |
237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, | |
238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, | |
239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, | |
240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, | |
241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, | |
242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, | |
243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, | |
244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, | |
245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, | |
246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, | |
247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, | |
248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, | |
249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, | |
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, | |
251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, | |
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, | |
253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, | |
254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, | |
255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, | |
256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, | |
257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, | |
258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, | |
259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, | |
260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, | |
261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, | |
262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, | |
263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, | |
264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, | |
265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, | |
266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, | |
2138 | 267 //error:0.000020 |
268 }; | |
269 static const uint8_t obmc16[256]={ | |
3206 | 270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, |
271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, | |
272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, | |
273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, | |
274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, | |
275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, | |
276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, | |
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, | |
278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, | |
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, | |
280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, | |
281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, | |
282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, | |
283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, | |
284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, | |
285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, | |
2138 | 286 //error:0.000015 |
287 }; | |
288 #else //64*cos | |
289 static const uint8_t obmc32[1024]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
291 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
292 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
293 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
294 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
295 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
296 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
297 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
298 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
299 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
300 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
301 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
302 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
303 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
304 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
306 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
308 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
309 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
310 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
311 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
312 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
313 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
314 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
315 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
316 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
317 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
318 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
319 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
320 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2138 | 322 //error:0.000022 |
323 }; | |
324 static const uint8_t obmc16[256]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
325 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
326 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
327 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
328 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
329 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
330 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
331 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
333 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
335 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
336 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
337 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
338 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
339 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
340 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
2138 | 341 //error:0.000022 |
342 }; | |
5910 | 343 #endif /* 0 */ |
2138 | 344 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
345 //linear *64 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
346 static const uint8_t obmc8[64]={ |
3206 | 347 4, 12, 20, 28, 28, 20, 12, 4, |
348 12, 36, 60, 84, 84, 60, 36, 12, | |
349 20, 60,100,140,140,100, 60, 20, | |
350 28, 84,140,196,196,140, 84, 28, | |
351 28, 84,140,196,196,140, 84, 28, | |
352 20, 60,100,140,140,100, 60, 20, | |
353 12, 36, 60, 84, 84, 60, 36, 12, | |
354 4, 12, 20, 28, 28, 20, 12, 4, | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
355 //error:0.000000 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
356 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
357 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
358 //linear *64 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
359 static const uint8_t obmc4[16]={ |
3206 | 360 16, 48, 48, 16, |
361 48,144,144, 48, | |
362 48,144,144, 48, | |
363 16, 48, 48, 16, | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
364 //error:0.000000 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
365 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
366 |
7129 | 367 static const uint8_t * const obmc_tab[4]={ |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
368 obmc32, obmc16, obmc8, obmc4 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
369 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
370 |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES]; |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
372 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
373 typedef struct BlockNode{ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
374 int16_t mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
375 int16_t my; |
3314 | 376 uint8_t ref; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
377 uint8_t color[3]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
378 uint8_t type; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
379 //#define TYPE_SPLIT 1 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
380 #define BLOCK_INTRA 1 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
381 #define BLOCK_OPT 2 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
382 //#define TYPE_NOCOLOR 4 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
383 uint8_t level; //FIXME merge into type? |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
384 }BlockNode; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
385 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
386 static const BlockNode null_block= { //FIXME add border maybe |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
387 .color= {128,128,128}, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
388 .mx= 0, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
389 .my= 0, |
3314 | 390 .ref= 0, |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
391 .type= 0, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
392 .level= 0, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
393 }; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
394 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
395 #define LOG2_MB_SIZE 4 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
396 #define MB_SIZE (1<<LOG2_MB_SIZE) |
5575 | 397 #define ENCODER_EXTRA_BITS 4 |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
398 #define HTAPS_MAX 8 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
399 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
400 typedef struct x_and_coeff{ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
401 int16_t x; |
2596 | 402 uint16_t coeff; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
403 } x_and_coeff; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
404 |
2138 | 405 typedef struct SubBand{ |
406 int level; | |
407 int stride; | |
408 int width; | |
409 int height; | |
6412 | 410 int qlog; ///< log(qscale)/log[2^(1/6)] |
2138 | 411 DWTELEM *buf; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
412 IDWTELEM *ibuf; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
413 int buf_x_offset; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
414 int buf_y_offset; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
415 int stride_line; ///< Stride measured in lines, not pixels. |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
416 x_and_coeff * x_coeff; |
2138 | 417 struct SubBand *parent; |
418 uint8_t state[/*7*2*/ 7 + 512][32]; | |
419 }SubBand; | |
420 | |
421 typedef struct Plane{ | |
422 int width; | |
423 int height; | |
424 SubBand band[MAX_DECOMPOSITIONS][4]; | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
425 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
426 int htaps; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
427 int8_t hcoeff[HTAPS_MAX/2]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
428 int diag_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
429 int fast_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
430 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
431 int last_htaps; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
432 int8_t last_hcoeff[HTAPS_MAX/2]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
433 int last_diag_mc; |
2138 | 434 }Plane; |
435 | |
436 typedef struct SnowContext{ | |
6412 | 437 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX) |
2138 | 438 |
439 AVCodecContext *avctx; | |
2335 | 440 RangeCoder c; |
2138 | 441 DSPContext dsp; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
442 AVFrame new_picture; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
443 AVFrame input_picture; ///< new_picture with the internal linesizes |
2138 | 444 AVFrame current_picture; |
3314 | 445 AVFrame last_picture[MAX_REF_FRAMES]; |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
446 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4]; |
2138 | 447 AVFrame mconly_picture; |
448 // uint8_t q_context[16]; | |
449 uint8_t header_state[32]; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
450 uint8_t block_state[128 + 32*128]; |
2138 | 451 int keyframe; |
2199 | 452 int always_reset; |
2138 | 453 int version; |
454 int spatial_decomposition_type; | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
455 int last_spatial_decomposition_type; |
2138 | 456 int temporal_decomposition_type; |
457 int spatial_decomposition_count; | |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
458 int last_spatial_decomposition_count; |
2138 | 459 int temporal_decomposition_count; |
3314 | 460 int max_ref_frames; |
461 int ref_frames; | |
462 int16_t (*ref_mvs[MAX_REF_FRAMES])[2]; | |
463 uint32_t *ref_scores[MAX_REF_FRAMES]; | |
2138 | 464 DWTELEM *spatial_dwt_buffer; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
465 IDWTELEM *spatial_idwt_buffer; |
2138 | 466 int colorspace_type; |
467 int chroma_h_shift; | |
468 int chroma_v_shift; | |
469 int spatial_scalability; | |
470 int qlog; | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
471 int last_qlog; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
472 int lambda; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
473 int lambda2; |
3313 | 474 int pass1_rc; |
2138 | 475 int mv_scale; |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
476 int last_mv_scale; |
2138 | 477 int qbias; |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
478 int last_qbias; |
2138 | 479 #define QBIAS_SHIFT 3 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
480 int b_width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
481 int b_height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
482 int block_max_depth; |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
483 int last_block_max_depth; |
2138 | 484 Plane plane[MAX_PLANES]; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
485 BlockNode *block; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
486 #define ME_CACHE_SIZE 1024 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
487 int me_cache[ME_CACHE_SIZE]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
488 int me_cache_generation; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
489 slice_buffer sb; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
490 |
6412 | 491 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX) |
8214
c5276ad92ff8
snow: move scratch buffer from stack to malloced buffer in context
mru
parents:
7622
diff
changeset
|
492 |
c5276ad92ff8
snow: move scratch buffer from stack to malloced buffer in context
mru
parents:
7622
diff
changeset
|
493 uint8_t *scratchbuf; |
2138 | 494 }SnowContext; |
495 | |
2562 | 496 typedef struct { |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
497 IDWTELEM *b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
498 IDWTELEM *b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
499 IDWTELEM *b2; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
500 IDWTELEM *b3; |
2562 | 501 int y; |
8308 | 502 } DWTCompose; |
2562 | 503 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
504 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
505 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
506 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
507 static void iterative_me(SnowContext *s); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
508 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
509 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer) |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
510 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
511 int i; |
2967 | 512 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
513 buf->base_buffer = base_buffer; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
514 buf->line_count = line_count; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
515 buf->line_width = line_width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
516 buf->data_count = max_allocated_lines; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
517 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
518 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines); |
2967 | 519 |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
520 for(i = 0; i < max_allocated_lines; i++){ |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
521 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
522 } |
2967 | 523 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
524 buf->data_stack_top = max_allocated_lines - 1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
525 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
526 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
527 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
528 { |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
529 IDWTELEM * buffer; |
2967 | 530 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
531 assert(buf->data_stack_top >= 0); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
532 // assert(!buf->line[line]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
533 if (buf->line[line]) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
534 return buf->line[line]; |
2967 | 535 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
536 buffer = buf->data_stack[buf->data_stack_top]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
537 buf->data_stack_top--; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
538 buf->line[line] = buffer; |
2967 | 539 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
540 return buffer; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
541 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
542 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
543 static void slice_buffer_release(slice_buffer * buf, int line) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
544 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
545 int offset; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
546 IDWTELEM * buffer; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
547 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
548 assert(line >= 0 && line < buf->line_count); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
549 assert(buf->line[line]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
550 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
551 offset = buf->line_width * line; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
552 buffer = buf->line[line]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
553 buf->data_stack_top++; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
554 buf->data_stack[buf->data_stack_top] = buffer; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
555 buf->line[line] = NULL; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
556 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
557 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
558 static void slice_buffer_flush(slice_buffer * buf) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
559 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
560 int i; |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
561 for(i = 0; i < buf->line_count; i++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
562 if (buf->line[i]) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
563 slice_buffer_release(buf, i); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
564 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
565 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
566 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
567 static void slice_buffer_destroy(slice_buffer * buf) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
568 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
569 int i; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
570 slice_buffer_flush(buf); |
2967 | 571 |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
572 for(i = buf->data_count - 1; i >= 0; i--){ |
3190 | 573 av_freep(&buf->data_stack[i]); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
574 } |
3190 | 575 av_freep(&buf->data_stack); |
576 av_freep(&buf->line); | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
577 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
578 |
2979 | 579 #ifdef __sgi |
2368
a7ac68734a91
fix for build on IRIX by (Michel Bardiaux {mbardiaux peaktime be})
michael
parents:
2335
diff
changeset
|
580 // Avoid a name clash on SGI IRIX |
2979 | 581 #undef qexp |
2368
a7ac68734a91
fix for build on IRIX by (Michel Bardiaux {mbardiaux peaktime be})
michael
parents:
2335
diff
changeset
|
582 #endif |
2246 | 583 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0 |
2600 | 584 static uint8_t qexp[QROOT]; |
2138 | 585 |
586 static inline int mirror(int v, int m){ | |
2998 | 587 while((unsigned)v > (unsigned)m){ |
588 v=-v; | |
589 if(v<0) v+= 2*m; | |
590 } | |
591 return v; | |
2138 | 592 } |
593 | |
2335 | 594 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){ |
2138 | 595 int i; |
596 | |
597 if(v){ | |
4001 | 598 const int a= FFABS(v); |
2138 | 599 const int e= av_log2(a); |
600 #if 1 | |
2967 | 601 const int el= FFMIN(e, 10); |
2335 | 602 put_rac(c, state+0, 0); |
2138 | 603 |
604 for(i=0; i<el; i++){ | |
2335 | 605 put_rac(c, state+1+i, 1); //1..10 |
2138 | 606 } |
607 for(; i<e; i++){ | |
2335 | 608 put_rac(c, state+1+9, 1); //1..10 |
2138 | 609 } |
2335 | 610 put_rac(c, state+1+FFMIN(i,9), 0); |
2138 | 611 |
612 for(i=e-1; i>=el; i--){ | |
2335 | 613 put_rac(c, state+22+9, (a>>i)&1); //22..31 |
2138 | 614 } |
615 for(; i>=0; i--){ | |
2335 | 616 put_rac(c, state+22+i, (a>>i)&1); //22..31 |
2138 | 617 } |
618 | |
619 if(is_signed) | |
2335 | 620 put_rac(c, state+11 + el, v < 0); //11..21 |
2138 | 621 #else |
2967 | 622 |
2335 | 623 put_rac(c, state+0, 0); |
2138 | 624 if(e<=9){ |
625 for(i=0; i<e; i++){ | |
2335 | 626 put_rac(c, state+1+i, 1); //1..10 |
2138 | 627 } |
2335 | 628 put_rac(c, state+1+i, 0); |
2138 | 629 |
630 for(i=e-1; i>=0; i--){ | |
2335 | 631 put_rac(c, state+22+i, (a>>i)&1); //22..31 |
2138 | 632 } |
633 | |
634 if(is_signed) | |
2335 | 635 put_rac(c, state+11 + e, v < 0); //11..21 |
2138 | 636 }else{ |
637 for(i=0; i<e; i++){ | |
2335 | 638 put_rac(c, state+1+FFMIN(i,9), 1); //1..10 |
2138 | 639 } |
2335 | 640 put_rac(c, state+1+FFMIN(i,9), 0); |
2138 | 641 |
642 for(i=e-1; i>=0; i--){ | |
2335 | 643 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31 |
2138 | 644 } |
645 | |
646 if(is_signed) | |
2335 | 647 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21 |
2138 | 648 } |
5910 | 649 #endif /* 1 */ |
2138 | 650 }else{ |
2335 | 651 put_rac(c, state+0, 1); |
2138 | 652 } |
653 } | |
654 | |
2335 | 655 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ |
656 if(get_rac(c, state+0)) | |
2138 | 657 return 0; |
658 else{ | |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
659 int i, e, a; |
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
660 e= 0; |
2335 | 661 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10 |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
662 e++; |
2138 | 663 } |
664 | |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
665 a= 1; |
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
666 for(i=e-1; i>=0; i--){ |
2335 | 667 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31 |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
668 } |
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
669 |
2335 | 670 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21 |
2138 | 671 return -a; |
672 else | |
673 return a; | |
674 } | |
675 } | |
676 | |
2335 | 677 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
678 int i; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
679 int r= log2>=0 ? 1<<log2 : 1; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
680 |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
681 assert(v>=0); |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
682 assert(log2>=-4); |
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
683 |
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
684 while(v >= r){ |
2335 | 685 put_rac(c, state+4+log2, 1); |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
686 v -= r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
687 log2++; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
688 if(log2>0) r+=r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
689 } |
2335 | 690 put_rac(c, state+4+log2, 0); |
2967 | 691 |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
692 for(i=log2-1; i>=0; i--){ |
2335 | 693 put_rac(c, state+31-i, (v>>i)&1); |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
694 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
695 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
696 |
2335 | 697 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
698 int i; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
699 int r= log2>=0 ? 1<<log2 : 1; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
700 int v=0; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
701 |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
702 assert(log2>=-4); |
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
703 |
2335 | 704 while(get_rac(c, state+4+log2)){ |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
705 v+= r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
706 log2++; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
707 if(log2>0) r+=r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
708 } |
2967 | 709 |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
710 for(i=log2-1; i>=0; i--){ |
2335 | 711 v+= get_rac(c, state+31-i)<<i; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
712 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
713 |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
714 return v; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
715 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
716 |
5627 | 717 static av_always_inline void |
718 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, | |
719 int dst_step, int src_step, int ref_step, | |
720 int width, int mul, int add, int shift, | |
721 int highpass, int inverse){ | |
2138 | 722 const int mirror_left= !highpass; |
723 const int mirror_right= (width&1) ^ highpass; | |
724 const int w= (width>>1) - 1 + (highpass & width); | |
725 int i; | |
726 | |
727 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) | |
728 if(mirror_left){ | |
729 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); | |
730 dst += dst_step; | |
731 src += src_step; | |
732 } | |
2967 | 733 |
2138 | 734 for(i=0; i<w; i++){ |
5627 | 735 dst[i*dst_step] = |
736 LIFT(src[i*src_step], | |
737 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), | |
738 inverse); | |
2138 | 739 } |
2967 | 740 |
2138 | 741 if(mirror_right){ |
5627 | 742 dst[w*dst_step] = |
743 LIFT(src[w*src_step], | |
744 ((mul*2*ref[w*ref_step]+add)>>shift), | |
745 inverse); | |
2138 | 746 } |
747 } | |
748 | |
5627 | 749 static av_always_inline void |
750 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, | |
751 int dst_step, int src_step, int ref_step, | |
752 int width, int mul, int add, int shift, | |
753 int highpass, int inverse){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
754 const int mirror_left= !highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
755 const int mirror_right= (width&1) ^ highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
756 const int w= (width>>1) - 1 + (highpass & width); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
757 int i; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
758 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
759 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
760 if(mirror_left){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
761 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
762 dst += dst_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
763 src += src_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
764 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
765 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
766 for(i=0; i<w; i++){ |
5627 | 767 dst[i*dst_step] = |
768 LIFT(src[i*src_step], | |
769 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), | |
770 inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
771 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
772 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
773 if(mirror_right){ |
5627 | 774 dst[w*dst_step] = |
775 LIFT(src[w*src_step], | |
776 ((mul*2*ref[w*ref_step]+add)>>shift), | |
777 inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
778 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
779 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
780 |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
781 #ifndef liftS |
5627 | 782 static av_always_inline void |
783 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, | |
784 int dst_step, int src_step, int ref_step, | |
785 int width, int mul, int add, int shift, | |
786 int highpass, int inverse){ | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
787 const int mirror_left= !highpass; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
788 const int mirror_right= (width&1) ^ highpass; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
789 const int w= (width>>1) - 1 + (highpass & width); |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
790 int i; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
791 |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
792 assert(shift == 4); |
5627 | 793 #define LIFTS(src, ref, inv) \ |
794 ((inv) ? \ | |
795 (src) + (((ref) + 4*(src))>>shift): \ | |
796 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
797 if(mirror_left){ |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
798 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
799 dst += dst_step; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
800 src += src_step; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
801 } |
2967 | 802 |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
803 for(i=0; i<w; i++){ |
5627 | 804 dst[i*dst_step] = |
805 LIFTS(src[i*src_step], | |
806 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, | |
807 inverse); | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
808 } |
2967 | 809 |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
810 if(mirror_right){ |
5627 | 811 dst[w*dst_step] = |
812 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
813 } |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
814 } |
5627 | 815 static av_always_inline void |
816 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, | |
817 int dst_step, int src_step, int ref_step, | |
818 int width, int mul, int add, int shift, | |
819 int highpass, int inverse){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
820 const int mirror_left= !highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
821 const int mirror_right= (width&1) ^ highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
822 const int w= (width>>1) - 1 + (highpass & width); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
823 int i; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
824 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
825 assert(shift == 4); |
5627 | 826 #define LIFTS(src, ref, inv) \ |
827 ((inv) ? \ | |
828 (src) + (((ref) + 4*(src))>>shift): \ | |
829 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
830 if(mirror_left){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
831 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
832 dst += dst_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
833 src += src_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
834 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
835 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
836 for(i=0; i<w; i++){ |
5627 | 837 dst[i*dst_step] = |
838 LIFTS(src[i*src_step], | |
839 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, | |
840 inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
841 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
842 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
843 if(mirror_right){ |
5627 | 844 dst[w*dst_step] = |
845 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
846 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
847 } |
6413 | 848 #endif /* ! liftS */ |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
849 |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
850 static void horizontal_decompose53i(DWTELEM *b, int width){ |
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
851 DWTELEM temp[width]; |
2138 | 852 const int width2= width>>1; |
2893 | 853 int x; |
2138 | 854 const int w2= (width+1)>>1; |
855 | |
856 for(x=0; x<width2; x++){ | |
857 temp[x ]= b[2*x ]; | |
858 temp[x+w2]= b[2*x + 1]; | |
859 } | |
860 if(width&1) | |
861 temp[x ]= b[2*x ]; | |
862 #if 0 | |
2893 | 863 { |
864 int A1,A2,A3,A4; | |
2138 | 865 A2= temp[1 ]; |
866 A4= temp[0 ]; | |
867 A1= temp[0+width2]; | |
868 A1 -= (A2 + A4)>>1; | |
869 A4 += (A1 + 1)>>1; | |
870 b[0+width2] = A1; | |
871 b[0 ] = A4; | |
872 for(x=1; x+1<width2; x+=2){ | |
873 A3= temp[x+width2]; | |
874 A4= temp[x+1 ]; | |
875 A3 -= (A2 + A4)>>1; | |
876 A2 += (A1 + A3 + 2)>>2; | |
877 b[x+width2] = A3; | |
878 b[x ] = A2; | |
879 | |
880 A1= temp[x+1+width2]; | |
881 A2= temp[x+2 ]; | |
882 A1 -= (A2 + A4)>>1; | |
883 A4 += (A1 + A3 + 2)>>2; | |
884 b[x+1+width2] = A1; | |
885 b[x+1 ] = A4; | |
886 } | |
887 A3= temp[width-1]; | |
888 A3 -= A2; | |
889 A2 += (A1 + A3 + 2)>>2; | |
890 b[width -1] = A3; | |
891 b[width2-1] = A2; | |
2893 | 892 } |
2967 | 893 #else |
2138 | 894 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); |
895 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0); | |
6413 | 896 #endif /* 0 */ |
2138 | 897 } |
898 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
899 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 900 int i; |
2967 | 901 |
2138 | 902 for(i=0; i<width; i++){ |
903 b1[i] -= (b0[i] + b2[i])>>1; | |
904 } | |
905 } | |
906 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
907 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 908 int i; |
2967 | 909 |
2138 | 910 for(i=0; i<width; i++){ |
911 b1[i] += (b0[i] + b2[i] + 2)>>2; | |
912 } | |
913 } | |
914 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
915 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){ |
2198 | 916 int y; |
2138 | 917 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride; |
918 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride; | |
2967 | 919 |
2138 | 920 for(y=-2; y<height; y+=2){ |
921 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; | |
922 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; | |
923 | |
2998 | 924 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width); |
925 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width); | |
6405 | 926 |
2998 | 927 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width); |
928 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width); | |
2967 | 929 |
2138 | 930 b0=b2; |
931 b1=b3; | |
932 } | |
933 } | |
934 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
935 static void horizontal_decompose97i(DWTELEM *b, int width){ |
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
936 DWTELEM temp[width]; |
2138 | 937 const int w2= (width+1)>>1; |
938 | |
5565
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
939 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1); |
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
940 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0); |
5589
946c2db0a093
cleanup (remove some old experimentation related code)
michael
parents:
5588
diff
changeset
|
941 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); |
2138 | 942 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); |
943 } | |
944 | |
945 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
946 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 947 int i; |
2967 | 948 |
2138 | 949 for(i=0; i<width; i++){ |
950 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
951 } | |
952 } | |
953 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
954 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 955 int i; |
2967 | 956 |
2138 | 957 for(i=0; i<width; i++){ |
958 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | |
959 } | |
960 } | |
961 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
962 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 963 int i; |
2967 | 964 |
2138 | 965 for(i=0; i<width; i++){ |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
966 #ifdef liftS |
2138 | 967 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
968 #else |
5565
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
969 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23); |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
970 #endif |
2138 | 971 } |
972 } | |
973 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
974 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 975 int i; |
2967 | 976 |
2138 | 977 for(i=0; i<width; i++){ |
978 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | |
979 } | |
980 } | |
981 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
982 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){ |
2198 | 983 int y; |
2138 | 984 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride; |
985 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride; | |
986 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride; | |
987 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride; | |
2967 | 988 |
2138 | 989 for(y=-4; y<height; y+=2){ |
990 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; | |
991 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; | |
992 | |
2998 | 993 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width); |
994 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width); | |
6405 | 995 |
2998 | 996 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width); |
997 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width); | |
998 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width); | |
999 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width); | |
2138 | 1000 |
1001 b0=b2; | |
1002 b1=b3; | |
1003 b2=b4; | |
1004 b3=b5; | |
1005 } | |
1006 } | |
1007 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
1008 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
2138 | 1009 int level; |
2967 | 1010 |
2164 | 1011 for(level=0; level<decomposition_count; level++){ |
1012 switch(type){ | |
3326 | 1013 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break; |
1014 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break; | |
2138 | 1015 } |
1016 } | |
1017 } | |
1018 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1019 static void horizontal_compose53i(IDWTELEM *b, int width){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1020 IDWTELEM temp[width]; |
2138 | 1021 const int width2= width>>1; |
1022 const int w2= (width+1)>>1; | |
2893 | 1023 int x; |
2138 | 1024 |
1025 #if 0 | |
2893 | 1026 int A1,A2,A3,A4; |
2138 | 1027 A2= temp[1 ]; |
1028 A4= temp[0 ]; | |
1029 A1= temp[0+width2]; | |
1030 A1 -= (A2 + A4)>>1; | |
1031 A4 += (A1 + 1)>>1; | |
1032 b[0+width2] = A1; | |
1033 b[0 ] = A4; | |
1034 for(x=1; x+1<width2; x+=2){ | |
1035 A3= temp[x+width2]; | |
1036 A4= temp[x+1 ]; | |
1037 A3 -= (A2 + A4)>>1; | |
1038 A2 += (A1 + A3 + 2)>>2; | |
1039 b[x+width2] = A3; | |
1040 b[x ] = A2; | |
1041 | |
1042 A1= temp[x+1+width2]; | |
1043 A2= temp[x+2 ]; | |
1044 A1 -= (A2 + A4)>>1; | |
1045 A4 += (A1 + A3 + 2)>>2; | |
1046 b[x+1+width2] = A1; | |
1047 b[x+1 ] = A4; | |
1048 } | |
1049 A3= temp[width-1]; | |
1050 A3 -= A2; | |
1051 A2 += (A1 + A3 + 2)>>2; | |
1052 b[width -1] = A3; | |
1053 b[width2-1] = A2; | |
2967 | 1054 #else |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1055 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1056 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1); |
5910 | 1057 #endif /* 0 */ |
2138 | 1058 for(x=0; x<width2; x++){ |
1059 b[2*x ]= temp[x ]; | |
1060 b[2*x + 1]= temp[x+w2]; | |
1061 } | |
1062 if(width&1) | |
1063 b[2*x ]= temp[x ]; | |
1064 } | |
1065 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1066 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1067 int i; |
2967 | 1068 |
2138 | 1069 for(i=0; i<width; i++){ |
1070 b1[i] += (b0[i] + b2[i])>>1; | |
1071 } | |
1072 } | |
1073 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1074 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1075 int i; |
2967 | 1076 |
2138 | 1077 for(i=0; i<width; i++){ |
1078 b1[i] -= (b0[i] + b2[i] + 2)>>2; | |
1079 } | |
1080 } | |
1081 | |
8308 | 1082 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1083 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1084 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1085 cs->y = -1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1086 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1087 |
8308 | 1088 static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){ |
2562 | 1089 cs->b0 = buffer + mirror(-1-1, height-1)*stride; |
1090 cs->b1 = buffer + mirror(-1 , height-1)*stride; | |
1091 cs->y = -1; | |
1092 } | |
1093 | |
8308 | 1094 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1095 int y= cs->y; |
2967 | 1096 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1097 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1098 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1099 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1100 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1101 |
2998 | 1102 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
1103 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | |
6405 | 1104 |
2998 | 1105 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); |
1106 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1107 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1108 cs->b0 = b2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1109 cs->b1 = b3; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1110 cs->y += 2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1111 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1112 |
8308 | 1113 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){ |
2562 | 1114 int y= cs->y; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1115 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1116 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1117 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1118 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride; |
2138 | 1119 |
2998 | 1120 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
1121 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | |
6405 | 1122 |
2998 | 1123 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); |
1124 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); | |
2138 | 1125 |
2562 | 1126 cs->b0 = b2; |
1127 cs->b1 = b3; | |
1128 cs->y += 2; | |
1129 } | |
1130 | |
5989
382aa8b243c5
prevent warnings about functions being possibly unused
michael
parents:
5988
diff
changeset
|
1131 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){ |
8308 | 1132 DWTCompose cs; |
5988
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1133 spatial_compose53i_init(&cs, buffer, height, stride); |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1134 while(cs.y <= height) |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1135 spatial_compose53i_dy(&cs, buffer, width, height, stride); |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1136 } |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1137 |
2967 | 1138 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1139 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1140 IDWTELEM temp[width]; |
2138 | 1141 const int w2= (width+1)>>1; |
1142 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1143 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); |
5589
946c2db0a093
cleanup (remove some old experimentation related code)
michael
parents:
5588
diff
changeset
|
1144 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1145 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1146 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0); |
2138 | 1147 } |
1148 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1149 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1150 int i; |
2967 | 1151 |
2138 | 1152 for(i=0; i<width; i++){ |
1153 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
1154 } | |
1155 } | |
1156 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1157 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1158 int i; |
2967 | 1159 |
2138 | 1160 for(i=0; i<width; i++){ |
1161 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | |
1162 } | |
1163 } | |
1164 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1165 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1166 int i; |
2967 | 1167 |
2138 | 1168 for(i=0; i<width; i++){ |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1169 #ifdef liftS |
2138 | 1170 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1171 #else |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1172 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1173 #endif |
2138 | 1174 } |
1175 } | |
1176 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1177 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1178 int i; |
2967 | 1179 |
2138 | 1180 for(i=0; i<width; i++){ |
1181 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | |
1182 } | |
1183 } | |
1184 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1185 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ |
2592 | 1186 int i; |
2967 | 1187 |
2592 | 1188 for(i=0; i<width; i++){ |
1189 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS; | |
1190 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS; | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1191 #ifdef liftS |
2592 | 1192 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS; |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1193 #else |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1194 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1195 #endif |
2592 | 1196 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; |
1197 } | |
1198 } | |
1199 | |
8308 | 1200 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1201 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1202 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1203 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1204 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1205 cs->y = -3; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1206 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1207 |
8308 | 1208 static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){ |
2562 | 1209 cs->b0 = buffer + mirror(-3-1, height-1)*stride; |
1210 cs->b1 = buffer + mirror(-3 , height-1)*stride; | |
1211 cs->b2 = buffer + mirror(-3+1, height-1)*stride; | |
1212 cs->b3 = buffer + mirror(-3+2, height-1)*stride; | |
1213 cs->y = -3; | |
1214 } | |
2138 | 1215 |
8308 | 1216 static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1217 int y = cs->y; |
2967 | 1218 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1219 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1220 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1221 IDWTELEM *b2= cs->b2; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1222 IDWTELEM *b3= cs->b3; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1223 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1224 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line); |
2967 | 1225 |
2592 | 1226 if(y>0 && y+4<height){ |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1227 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); |
2592 | 1228 }else{ |
2998 | 1229 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
1230 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | |
1231 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | |
1232 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); | |
2592 | 1233 } |
6405 | 1234 |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1235 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1236 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1237 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1238 cs->b0=b2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1239 cs->b1=b3; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1240 cs->b2=b4; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1241 cs->b3=b5; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1242 cs->y += 2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1243 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1244 |
8308 | 1245 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){ |
2562 | 1246 int y = cs->y; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1247 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1248 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1249 IDWTELEM *b2= cs->b2; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1250 IDWTELEM *b3= cs->b3; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1251 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1252 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride; |
2138 | 1253 |
2998 | 1254 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
1255 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | |
1256 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | |
1257 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); | |
6405 | 1258 |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1259 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1260 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width); |
2562 | 1261 |
1262 cs->b0=b2; | |
1263 cs->b1=b3; | |
1264 cs->b2=b4; | |
1265 cs->b3=b5; | |
1266 cs->y += 2; | |
1267 } | |
1268 | |
5989
382aa8b243c5
prevent warnings about functions being possibly unused
michael
parents:
5988
diff
changeset
|
1269 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){ |
8308 | 1270 DWTCompose cs; |
5988
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1271 spatial_compose97i_init(&cs, buffer, height, stride); |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1272 while(cs.y <= height) |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1273 spatial_compose97i_dy(&cs, buffer, width, height, stride); |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1274 } |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1275 |
8308 | 1276 static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1277 int level; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1278 for(level=decomposition_count-1; level>=0; level--){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1279 switch(type){ |
3326 | 1280 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; |
1281 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1282 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1283 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1284 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1285 |
8308 | 1286 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
2562 | 1287 int level; |
1288 for(level=decomposition_count-1; level>=0; level--){ | |
1289 switch(type){ | |
3326 | 1290 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; |
1291 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; | |
2562 | 1292 } |
1293 } | |
1294 } | |
1295 | |
8308 | 1296 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){ |
2562 | 1297 const int support = type==1 ? 3 : 5; |
1298 int level; | |
1299 if(type==2) return; | |
1300 | |
1301 for(level=decomposition_count-1; level>=0; level--){ | |
1302 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ | |
1303 switch(type){ | |
3326 | 1304 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); |
6414 | 1305 break; |
3326 | 1306 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); |
6414 | 1307 break; |
2562 | 1308 } |
1309 } | |
2138 | 1310 } |
1311 } | |
1312 | |
8308 | 1313 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1314 const int support = type==1 ? 3 : 5; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1315 int level; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1316 if(type==2) return; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1317 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1318 for(level=decomposition_count-1; level>=0; level--){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1319 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1320 switch(type){ |
3326 | 1321 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level); |
6414 | 1322 break; |
3326 | 1323 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); |
6414 | 1324 break; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1325 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1326 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1327 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1328 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1329 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1330 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
8308 | 1331 DWTCompose cs[MAX_DECOMPOSITIONS]; |
2562 | 1332 int y; |
1333 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count); | |
1334 for(y=0; y<height; y+=4) | |
1335 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y); | |
2138 | 1336 } |
1337 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1338 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1339 const int w= b->width; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1340 const int h= b->height; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1341 int x, y; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1342 |
2138 | 1343 if(1){ |
1344 int run=0; | |
2149 | 1345 int runs[w*h]; |
2138 | 1346 int run_index=0; |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1347 int max_index; |
2967 | 1348 |
2138 | 1349 for(y=0; y<h; y++){ |
1350 for(x=0; x<w; x++){ | |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1351 int v, p=0; |
2144 | 1352 int /*ll=0, */l=0, lt=0, t=0, rt=0; |
2149 | 1353 v= src[x + y*stride]; |
2138 | 1354 |
1355 if(y){ | |
2149 | 1356 t= src[x + (y-1)*stride]; |
2138 | 1357 if(x){ |
2149 | 1358 lt= src[x - 1 + (y-1)*stride]; |
2138 | 1359 } |
1360 if(x + 1 < w){ | |
2149 | 1361 rt= src[x + 1 + (y-1)*stride]; |
2138 | 1362 } |
1363 } | |
1364 if(x){ | |
2149 | 1365 l= src[x - 1 + y*stride]; |
2144 | 1366 /*if(x > 1){ |
1367 if(orientation==1) ll= src[y + (x-2)*stride]; | |
1368 else ll= src[x - 2 + y*stride]; | |
2138 | 1369 }*/ |
1370 } | |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1371 if(parent){ |
2149 | 1372 int px= x>>1; |
1373 int py= y>>1; | |
2967 | 1374 if(px<b->parent->width && py<b->parent->height) |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1375 p= parent[px + py*2*stride]; |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1376 } |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1377 if(!(/*ll|*/l|lt|t|rt|p)){ |
2138 | 1378 if(v){ |
1379 runs[run_index++]= run; | |
1380 run=0; | |
1381 }else{ | |
1382 run++; | |
1383 } | |
1384 } | |
1385 } | |
1386 } | |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1387 max_index= run_index; |
2138 | 1388 runs[run_index++]= run; |
1389 run_index=0; | |
1390 run= runs[run_index++]; | |
1391 | |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1392 put_symbol2(&s->c, b->state[30], max_index, 0); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1393 if(run_index <= max_index) |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1394 put_symbol2(&s->c, b->state[1], run, 3); |
2967 | 1395 |
2138 | 1396 for(y=0; y<h; y++){ |
2435
c89ac0e70c66
10l patch by (matthieu castet <castet.matthieu free fr>)
michael
parents:
2422
diff
changeset
|
1397 if(s->c.bytestream_end - s->c.bytestream < w*40){ |
2422 | 1398 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
1399 return -1; | |
1400 } | |
2138 | 1401 for(x=0; x<w; x++){ |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1402 int v, p=0; |
2144 | 1403 int /*ll=0, */l=0, lt=0, t=0, rt=0; |
2149 | 1404 v= src[x + y*stride]; |
2138 | 1405 |
1406 if(y){ | |
2149 | 1407 t= src[x + (y-1)*stride]; |
2138 | 1408 if(x){ |
2149 | 1409 lt= src[x - 1 + (y-1)*stride]; |
2138 | 1410 } |
1411 if(x + 1 < w){ | |
2149 | 1412 rt= src[x + 1 + (y-1)*stride]; |
2138 | 1413 } |
1414 } | |
1415 if(x){ | |
2149 | 1416 l= src[x - 1 + y*stride]; |
2144 | 1417 /*if(x > 1){ |
1418 if(orientation==1) ll= src[y + (x-2)*stride]; | |
1419 else ll= src[x - 2 + y*stride]; | |
2138 | 1420 }*/ |
1421 } | |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1422 if(parent){ |
2149 | 1423 int px= x>>1; |
1424 int py= y>>1; | |
2967 | 1425 if(px<b->parent->width && py<b->parent->height) |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1426 p= parent[px + py*2*stride]; |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1427 } |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1428 if(/*ll|*/l|lt|t|rt|p){ |
4001 | 1429 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); |
2144 | 1430 |
2335 | 1431 put_rac(&s->c, &b->state[0][context], !!v); |
2138 | 1432 }else{ |
1433 if(!run){ | |
1434 run= runs[run_index++]; | |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1435 |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1436 if(run_index <= max_index) |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1437 put_symbol2(&s->c, b->state[1], run, 3); |
2138 | 1438 assert(v); |
1439 }else{ | |
1440 run--; | |
1441 assert(!v); | |
1442 } | |
1443 } | |
1444 if(v){ | |
4001 | 1445 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); |
1446 int l2= 2*FFABS(l) + (l<0); | |
1447 int t2= 2*FFABS(t) + (t<0); | |
1448 | |
1449 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4); | |
2596 | 1450 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0); |
2138 | 1451 } |
1452 } | |
1453 } | |
1454 } | |
2422 | 1455 return 0; |
2138 | 1456 } |
1457 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1458 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1459 // encode_subband_qtree(s, b, src, parent, stride, orientation); |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1460 // encode_subband_z0run(s, b, src, parent, stride, orientation); |
2422 | 1461 return encode_subband_c0run(s, b, src, parent, stride, orientation); |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1462 // encode_subband_dzr(s, b, src, parent, stride, orientation); |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1463 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1464 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1465 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){ |
2138 | 1466 const int w= b->width; |
1467 const int h= b->height; | |
1468 int x,y; | |
2967 | 1469 |
2138 | 1470 if(1){ |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1471 int run, runs; |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1472 x_and_coeff *xc= b->x_coeff; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1473 x_and_coeff *prev_xc= NULL; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1474 x_and_coeff *prev2_xc= xc; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1475 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1476 x_and_coeff *prev_parent_xc= parent_xc; |
2138 | 1477 |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1478 runs= get_symbol2(&s->c, b->state[30], 0); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1479 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1480 else run= INT_MAX; |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1481 |
2138 | 1482 for(y=0; y<h; y++){ |
2193 | 1483 int v=0; |
1484 int lt=0, t=0, rt=0; | |
1485 | |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1486 if(y && prev_xc->x == 0){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1487 rt= prev_xc->coeff; |
2193 | 1488 } |
2138 | 1489 for(x=0; x<w; x++){ |
2193 | 1490 int p=0; |
1491 const int l= v; | |
2967 | 1492 |
2193 | 1493 lt= t; t= rt; |
1494 | |
2194 | 1495 if(y){ |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1496 if(prev_xc->x <= x) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1497 prev_xc++; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1498 if(prev_xc->x == x + 1) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1499 rt= prev_xc->coeff; |
2194 | 1500 else |
1501 rt=0; | |
1502 } | |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1503 if(parent_xc){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1504 if(x>>1 > parent_xc->x){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1505 parent_xc++; |
2192 | 1506 } |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1507 if(x>>1 == parent_xc->x){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1508 p= parent_xc->coeff; |
2194 | 1509 } |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1510 } |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1511 if(/*ll|*/l|lt|t|rt|p){ |
4001 | 1512 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1)); |
2144 | 1513 |
2335 | 1514 v=get_rac(&s->c, &b->state[0][context]); |
2605 | 1515 if(v){ |
1516 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1); | |
1517 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]); | |
2967 | 1518 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1519 xc->x=x; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1520 (xc++)->coeff= v; |
2605 | 1521 } |
2138 | 1522 }else{ |
1523 if(!run){ | |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1524 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1525 else run= INT_MAX; |
2605 | 1526 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1); |
1527 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]); | |
2967 | 1528 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1529 xc->x=x; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1530 (xc++)->coeff= v; |
2138 | 1531 }else{ |
2606 | 1532 int max_run; |
2138 | 1533 run--; |
1534 v=0; | |
2191 | 1535 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1536 if(y) max_run= FFMIN(run, prev_xc->x - x - 2); |
2606 | 1537 else max_run= FFMIN(run, w-x-1); |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1538 if(parent_xc) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1539 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1); |
2606 | 1540 x+= max_run; |
1541 run-= max_run; | |
2138 | 1542 } |
1543 } | |
2192 | 1544 } |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1545 (xc++)->x= w+1; //end marker |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1546 prev_xc= prev2_xc; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1547 prev2_xc= xc; |
2967 | 1548 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1549 if(parent_xc){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1550 if(y&1){ |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1551 while(parent_xc->x != parent->width+1) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1552 parent_xc++; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1553 parent_xc++; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1554 prev_parent_xc= parent_xc; |
2192 | 1555 }else{ |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1556 parent_xc= prev_parent_xc; |
2138 | 1557 } |
1558 } | |
1559 } | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1560 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1561 (xc++)->x= w+1; //end marker |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1562 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1563 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1564 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1565 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1566 const int w= b->width; |
2893 | 1567 int y; |
4594 | 1568 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
2600 | 1569 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1570 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1571 int new_index = 0; |
2967 | 1572 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1573 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1574 qadd= 0; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1575 qmul= 1<<QEXPSHIFT; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1576 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1577 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1578 /* If we are on the second or later slice, restore our index. */ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1579 if (start_y != 0) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1580 new_index = save_state[0]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1581 |
2967 | 1582 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1583 for(y=start_y; y<h; y++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1584 int x = 0; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1585 int v; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1586 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1587 memset(line, 0, b->width*sizeof(IDWTELEM)); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1588 v = b->x_coeff[new_index].coeff; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1589 x = b->x_coeff[new_index++].x; |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
1590 while(x < w){ |
2596 | 1591 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT; |
1592 register int u= -(v&1); | |
1593 line[x] = (t^u) - u; | |
1594 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1595 v = b->x_coeff[new_index].coeff; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1596 x = b->x_coeff[new_index++].x; |
2138 | 1597 } |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1598 } |
2967 | 1599 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1600 /* Save our variables for the next slice. */ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1601 save_state[0] = new_index; |
2967 | 1602 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1603 return; |
2138 | 1604 } |
1605 | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
1606 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts |
2138 | 1607 int plane_index, level, orientation; |
1608 | |
2199 | 1609 for(plane_index=0; plane_index<3; plane_index++){ |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
1610 for(level=0; level<MAX_DECOMPOSITIONS; level++){ |
2138 | 1611 for(orientation=level ? 1:0; orientation<4; orientation++){ |
2335 | 1612 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state)); |
2138 | 1613 } |
1614 } | |
1615 } | |
2335 | 1616 memset(s->header_state, MID_STATE, sizeof(s->header_state)); |
1617 memset(s->block_state, MID_STATE, sizeof(s->block_state)); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1618 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1619 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1620 static int alloc_blocks(SnowContext *s){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1621 int w= -((-s->avctx->width )>>LOG2_MB_SIZE); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1622 int h= -((-s->avctx->height)>>LOG2_MB_SIZE); |
2967 | 1623 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1624 s->b_width = w; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1625 s->b_height= h; |
2967 | 1626 |
9370 | 1627 av_free(s->block); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1628 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1629 return 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1630 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1631 |
2335 | 1632 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){ |
1633 uint8_t *bytestream= d->bytestream; | |
1634 uint8_t *bytestream_start= d->bytestream_start; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1635 *d= *s; |
2335 | 1636 d->bytestream= bytestream; |
1637 d->bytestream_start= bytestream_start; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1638 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1639 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1640 //near copy & paste from dsputil, FIXME |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1641 static int pix_sum(uint8_t * pix, int line_size, int w) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1642 { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1643 int s, i, j; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1644 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1645 s = 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1646 for (i = 0; i < w; i++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1647 for (j = 0; j < w; j++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1648 s += pix[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1649 pix ++; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1650 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1651 pix += line_size - w; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1652 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1653 return s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1654 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1655 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1656 //near copy & paste from dsputil, FIXME |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1657 static int pix_norm1(uint8_t * pix, int line_size, int w) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1658 { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1659 int s, i, j; |
4179 | 1660 uint32_t *sq = ff_squareTbl + 256; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1661 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1662 s = 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1663 for (i = 0; i < w; i++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1664 for (j = 0; j < w; j ++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1665 s += sq[pix[0]]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1666 pix ++; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1667 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1668 pix += line_size - w; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1669 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1670 return s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1671 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1672 |
3314 | 1673 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){ |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1674 const int w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1675 const int rem_depth= s->block_max_depth - level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1676 const int index= (x + y*w) << rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1677 const int block_w= 1<<rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1678 BlockNode block; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1679 int i,j; |
2967 | 1680 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1681 block.color[0]= l; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1682 block.color[1]= cb; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1683 block.color[2]= cr; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1684 block.mx= mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1685 block.my= my; |
3314 | 1686 block.ref= ref; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1687 block.type= type; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1688 block.level= level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1689 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1690 for(j=0; j<block_w; j++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1691 for(i=0; i<block_w; i++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1692 s->block[index + i + j*w]= block; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1693 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1694 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1695 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1696 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1697 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1698 const int offset[3]= { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1699 y*c-> stride + x, |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1700 ((y*c->uvstride + x)>>1), |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1701 ((y*c->uvstride + x)>>1), |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1702 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1703 int i; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1704 for(i=0; i<3; i++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1705 c->src[0][i]= src [i]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1706 c->ref[0][i]= ref [i] + offset[i]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1707 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1708 assert(!ref_index); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1709 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1710 |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1711 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref, |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1712 const BlockNode *left, const BlockNode *top, const BlockNode *tr){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1713 if(s->ref_frames == 1){ |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1714 *mx = mid_pred(left->mx, top->mx, tr->mx); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1715 *my = mid_pred(left->my, top->my, tr->my); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1716 }else{ |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1717 const int *scale = scale_mv_ref[ref]; |
4407
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1718 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1719 (top ->mx * scale[top ->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1720 (tr ->mx * scale[tr ->ref] + 128) >>8); |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1721 *my = mid_pred((left->my * scale[left->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1722 (top ->my * scale[top ->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1723 (tr ->my * scale[tr ->ref] + 128) >>8); |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1724 } |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1725 } |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1726 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1727 //FIXME copy&paste |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1728 #define P_LEFT P[1] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1729 #define P_TOP P[2] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1730 #define P_TOPRIGHT P[3] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1731 #define P_MEDIAN P[4] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1732 #define P_MV1 P[9] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1733 #define FLAG_QPEL 1 //must be 1 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1734 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1735 static int encode_q_branch(SnowContext *s, int level, int x, int y){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1736 uint8_t p_buffer[1024]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1737 uint8_t i_buffer[1024]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1738 uint8_t p_state[sizeof(s->block_state)]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1739 uint8_t i_state[sizeof(s->block_state)]; |
2335 | 1740 RangeCoder pc, ic; |
1741 uint8_t *pbbak= s->c.bytestream; | |
1742 uint8_t *pbbak_start= s->c.bytestream_start; | |
5082 | 1743 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1744 const int w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1745 const int h= s->b_height << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1746 const int rem_depth= s->block_max_depth - level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1747 const int index= (x + y*w) << rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1748 const int block_w= 1<<(LOG2_MB_SIZE - level); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1749 int trx= (x+1)<<rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1750 int try= (y+1)<<rem_depth; |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1751 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1752 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1753 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1754 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1755 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1756 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1757 int pl = left->color[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1758 int pcb= left->color[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1759 int pcr= left->color[2]; |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1760 int pmx, pmy; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1761 int mx=0, my=0; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1762 int l,cr,cb; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1763 const int stride= s->current_picture.linesize[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1764 const int uvstride= s->current_picture.linesize[1]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1765 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1766 s->input_picture.data[1] + (x + y*uvstride)*block_w/2, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1767 s->input_picture.data[2] + (x + y*uvstride)*block_w/2}; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1768 int P[10][2]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1769 int16_t last_mv[3][2]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1770 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1771 const int shift= 1+qpel; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1772 MotionEstContext *c= &s->m.me; |
3314 | 1773 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); |
4001 | 1774 int mx_context= av_log2(2*FFABS(left->mx - top->mx)); |
1775 int my_context= av_log2(2*FFABS(left->my - top->my)); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1776 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
3314 | 1777 int ref, best_ref, ref_score, ref_mx, ref_my; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1778 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1779 assert(sizeof(s->block_state) >= 256); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1780 if(s->keyframe){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1781 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1782 return 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1783 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1784 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1785 // clip predictors / edge ? |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1786 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1787 P_LEFT[0]= left->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1788 P_LEFT[1]= left->my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1789 P_TOP [0]= top->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1790 P_TOP [1]= top->my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1791 P_TOPRIGHT[0]= tr->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1792 P_TOPRIGHT[1]= tr->my; |
2967 | 1793 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1794 last_mv[0][0]= s->block[index].mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1795 last_mv[0][1]= s->block[index].my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1796 last_mv[1][0]= right->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1797 last_mv[1][1]= right->my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1798 last_mv[2][0]= bottom->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1799 last_mv[2][1]= bottom->my; |
2967 | 1800 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1801 s->m.mb_stride=2; |
2967 | 1802 s->m.mb_x= |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1803 s->m.mb_y= 0; |
4360
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1804 c->skip= 0; |
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1805 |
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1806 assert(c-> stride == stride); |
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1807 assert(c->uvstride == uvstride); |
2967 | 1808 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1809 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1810 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1811 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1812 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV; |
2967 | 1813 |
2206 | 1814 c->xmin = - x*block_w - 16+2; |
1815 c->ymin = - y*block_w - 16+2; | |
1816 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; | |
1817 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1818 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1819 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift); |
2967 | 1820 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1821 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1822 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1823 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1824 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1825 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1826 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1827 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1828 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1829 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1830 if (!y) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1831 c->pred_x= P_LEFT[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1832 c->pred_y= P_LEFT[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1833 } else { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1834 c->pred_x = P_MEDIAN[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1835 c->pred_y = P_MEDIAN[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1836 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1837 |
3314 | 1838 score= INT_MAX; |
1839 best_ref= 0; | |
1840 for(ref=0; ref<s->ref_frames; ref++){ | |
1841 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0); | |
1842 | |
1843 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv, | |
1844 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w); | |
1845 | |
1846 assert(ref_mx >= c->xmin); | |
1847 assert(ref_mx <= c->xmax); | |
1848 assert(ref_my >= c->ymin); | |
1849 assert(ref_my <= c->ymax); | |
1850 | |
4360
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1851 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w); |
3314 | 1852 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0); |
1853 ref_score+= 2*av_log2(2*ref)*c->penalty_factor; | |
1854 if(s->ref_mvs[ref]){ | |
1855 s->ref_mvs[ref][index][0]= ref_mx; | |
1856 s->ref_mvs[ref][index][1]= ref_my; | |
1857 s->ref_scores[ref][index]= ref_score; | |
1858 } | |
1859 if(score > ref_score){ | |
1860 score= ref_score; | |
1861 best_ref= ref; | |
1862 mx= ref_mx; | |
1863 my= ref_my; | |
1864 } | |
1865 } | |
5127 | 1866 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2 |
2967 | 1867 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1868 // subpel search |
5085 | 1869 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1870 pc= s->c; |
2335 | 1871 pc.bytestream_start= |
1872 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1873 memcpy(p_state, s->block_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1874 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1875 if(level!=s->block_max_depth) |
2335 | 1876 put_rac(&pc, &p_state[4 + s_context], 1); |
1877 put_rac(&pc, &p_state[1 + left->type + top->type], 0); | |
3314 | 1878 if(s->ref_frames > 1) |
1879 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0); | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1880 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr); |
3314 | 1881 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1); |
1882 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1); | |
2335 | 1883 p_len= pc.bytestream - pc.bytestream_start; |
5082 | 1884 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1885 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1886 block_s= block_w*block_w; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1887 sum = pix_sum(current_data[0], stride, block_w); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1888 l= (sum + block_s/2)/block_s; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1889 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s; |
2967 | 1890 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1891 block_s= block_w*block_w>>2; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1892 sum = pix_sum(current_data[1], uvstride, block_w>>1); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1893 cb= (sum + block_s/2)/block_s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1894 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1895 sum = pix_sum(current_data[2], uvstride, block_w>>1); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1896 cr= (sum + block_s/2)/block_s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1897 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1898 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1899 ic= s->c; |
2335 | 1900 ic.bytestream_start= |
1901 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1902 memcpy(i_state, s->block_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1903 if(level!=s->block_max_depth) |
2335 | 1904 put_rac(&ic, &i_state[4 + s_context], 1); |
1905 put_rac(&ic, &i_state[1 + left->type + top->type], 1); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1906 put_symbol(&ic, &i_state[32], l-pl , 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1907 put_symbol(&ic, &i_state[64], cb-pcb, 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1908 put_symbol(&ic, &i_state[96], cr-pcr, 1); |
2335 | 1909 i_len= ic.bytestream - ic.bytestream_start; |
5082 | 1910 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1911 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1912 // assert(score==256*256*256*64-1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1913 assert(iscore < 255*255*256 + s->lambda2*10); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1914 assert(iscore >= 0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1915 assert(l>=0 && l<=255); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1916 assert(pl>=0 && pl<=255); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1917 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1918 if(level==0){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1919 int varc= iscore >> 8; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1920 int vard= score >> 8; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1921 if (vard <= 64 || vard < varc) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1922 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1923 else |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1924 c->scene_change_score+= s->m.qscale; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1925 } |
2967 | 1926 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1927 if(level!=s->block_max_depth){ |
2335 | 1928 put_rac(&s->c, &s->block_state[4 + s_context], 0); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1929 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1930 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1931 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1932 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1933 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead |
2967 | 1934 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1935 if(score2 < score && score2 < iscore) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1936 return score2; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1937 } |
2967 | 1938 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1939 if(iscore < score){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1940 pred_mv(s, &pmx, &pmy, 0, left, top, tr); |
2335 | 1941 memcpy(pbbak, i_buffer, i_len); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1942 s->c= ic; |
2335 | 1943 s->c.bytestream_start= pbbak_start; |
1944 s->c.bytestream= pbbak + i_len; | |
3314 | 1945 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1946 memcpy(s->block_state, i_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1947 return iscore; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1948 }else{ |
2335 | 1949 memcpy(pbbak, p_buffer, p_len); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1950 s->c= pc; |
2335 | 1951 s->c.bytestream_start= pbbak_start; |
1952 s->c.bytestream= pbbak + p_len; | |
3314 | 1953 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1954 memcpy(s->block_state, p_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1955 return score; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1956 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1957 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1958 |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
1959 static av_always_inline int same_block(BlockNode *a, BlockNode *b){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1960 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1961 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2])); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1962 }else{ |
3314 | 1963 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA)); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1964 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1965 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1966 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1967 static void encode_q_branch2(SnowContext *s, int level, int x, int y){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1968 const int w= s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1969 const int rem_depth= s->block_max_depth - level; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1970 const int index= (x + y*w) << rem_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1971 int trx= (x+1)<<rem_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1972 BlockNode *b= &s->block[index]; |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1973 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1974 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1975 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1976 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1977 int pl = left->color[0]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1978 int pcb= left->color[1]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1979 int pcr= left->color[2]; |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1980 int pmx, pmy; |
3314 | 1981 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); |
4001 | 1982 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref; |
1983 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1984 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1985 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1986 if(s->keyframe){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1987 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1988 return; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1989 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1990 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1991 if(level!=s->block_max_depth){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1992 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){ |
2995
dfc271b90fe6
4mv + iter ME 10l fix (still not bugfree but better)
michael
parents:
2994
diff
changeset
|
1993 put_rac(&s->c, &s->block_state[4 + s_context], 1); |
dfc271b90fe6
4mv + iter ME 10l fix (still not bugfree but better)
michael
parents:
2994
diff
changeset
|
1994 }else{ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1995 put_rac(&s->c, &s->block_state[4 + s_context], 0); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1996 encode_q_branch2(s, level+1, 2*x+0, 2*y+0); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1997 encode_q_branch2(s, level+1, 2*x+1, 2*y+0); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1998 encode_q_branch2(s, level+1, 2*x+0, 2*y+1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1999 encode_q_branch2(s, level+1, 2*x+1, 2*y+1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2000 return; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2001 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2002 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2003 if(b->type & BLOCK_INTRA){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2004 pred_mv(s, &pmx, &pmy, 0, left, top, tr); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2005 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2006 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2007 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2008 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1); |
3314 | 2009 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2010 }else{ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2011 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2012 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0); |
3314 | 2013 if(s->ref_frames > 1) |
2014 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0); | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2015 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2016 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1); |
3314 | 2017 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2018 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2019 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2020 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2021 static void decode_q_branch(SnowContext *s, int level, int x, int y){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2022 const int w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2023 const int rem_depth= s->block_max_depth - level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2024 const int index= (x + y*w) << rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2025 int trx= (x+1)<<rem_depth; |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2026 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2027 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2028 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2029 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2030 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
2967 | 2031 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2032 if(s->keyframe){ |
3314 | 2033 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2034 return; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2035 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2036 |
2335 | 2037 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){ |
4332 | 2038 int type, mx, my; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2039 int l = left->color[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2040 int cb= left->color[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2041 int cr= left->color[2]; |
3314 | 2042 int ref = 0; |
2043 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); | |
4001 | 2044 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx)); |
2045 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my)); | |
2967 | 2046 |
2335 | 2047 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2048 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2049 if(type){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2050 pred_mv(s, &mx, &my, 0, left, top, tr); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2051 l += get_symbol(&s->c, &s->block_state[32], 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2052 cb+= get_symbol(&s->c, &s->block_state[64], 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2053 cr+= get_symbol(&s->c, &s->block_state[96], 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2054 }else{ |
3314 | 2055 if(s->ref_frames > 1) |
2056 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0); | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2057 pred_mv(s, &mx, &my, ref, left, top, tr); |
3314 | 2058 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1); |
2059 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2060 } |
3314 | 2061 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2062 }else{ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2063 decode_q_branch(s, level+1, 2*x+0, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2064 decode_q_branch(s, level+1, 2*x+1, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2065 decode_q_branch(s, level+1, 2*x+0, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2066 decode_q_branch(s, level+1, 2*x+1, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2067 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2068 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2069 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2070 static void encode_blocks(SnowContext *s, int search){ |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2071 int x, y; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2072 int w= s->b_width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2073 int h= s->b_height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2074 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2075 if(s->avctx->me_method == ME_ITER && !s->keyframe && search) |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2076 iterative_me(s); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2077 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2078 for(y=0; y<h; y++){ |
2435
c89ac0e70c66
10l patch by (matthieu castet <castet.matthieu free fr>)
michael
parents:
2422
diff
changeset
|
2079 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit |
2422 | 2080 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
2081 return; | |
2082 } | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2083 for(x=0; x<w; x++){ |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2084 if(s->avctx->me_method == ME_ITER || !search) |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2085 encode_q_branch2(s, 0, x, y); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2086 else |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2087 encode_q_branch (s, 0, x, y); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2088 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2089 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2090 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2091 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2092 static void decode_blocks(SnowContext *s){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2093 int x, y; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2094 int w= s->b_width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2095 int h= s->b_height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2096 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2097 for(y=0; y<h; y++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2098 for(x=0; x<w; x++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2099 decode_q_branch(s, 0, x, y); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2100 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2101 } |
2138 | 2102 } |
2103 | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2104 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ |
7622 | 2105 static const uint8_t weight[64]={ |
5648 | 2106 8,7,6,5,4,3,2,1, |
2107 7,7,0,0,0,0,0,1, | |
2108 6,0,6,0,0,0,2,0, | |
2109 5,0,0,5,0,3,0,0, | |
2110 4,0,0,0,4,0,0,0, | |
2111 3,0,0,5,0,3,0,0, | |
2112 2,0,6,0,0,0,2,0, | |
2113 1,7,0,0,0,0,0,1, | |
2114 }; | |
2115 | |
7622 | 2116 static const uint8_t brane[256]={ |
5648 | 2117 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12, |
2118 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52, | |
2119 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc, | |
2120 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc, | |
2121 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc, | |
2122 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc, | |
2123 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc, | |
2124 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16, | |
2125 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56, | |
2126 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96, | |
2127 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc, | |
2128 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc, | |
2129 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc, | |
2130 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc, | |
2131 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc, | |
2132 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A, | |
2133 }; | |
2134 | |
7622 | 2135 static const uint8_t needs[16]={ |
5648 | 2136 0,1,0,0, |
2137 2,4,2,0, | |
2138 0,1,0,0, | |
2139 15 | |
2140 }; | |
2141 | |
2142 int x, y, b, r, l; | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2143 int16_t tmpIt [64*(32+HTAPS_MAX)]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2144 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)]; |
5648 | 2145 int16_t *tmpI= tmpIt; |
2146 uint8_t *tmp2= tmp2t[0]; | |
6205 | 2147 const uint8_t *hpel[11]; |
5648 | 2148 assert(dx<16 && dy<16); |
2149 r= brane[dx + 16*dy]&15; | |
2150 l= brane[dx + 16*dy]>>4; | |
2151 | |
2152 b= needs[l] | needs[r]; | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2153 if(p && !p->diag_mc) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2154 b= 15; |
5648 | 2155 |
2156 if(b&5){ | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2157 for(y=0; y < b_h+HTAPS_MAX-1; y++){ |
5649 | 2158 for(x=0; x < b_w; x++){ |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2159 int a_1=src[x + HTAPS_MAX/2-4]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2160 int a0= src[x + HTAPS_MAX/2-3]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2161 int a1= src[x + HTAPS_MAX/2-2]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2162 int a2= src[x + HTAPS_MAX/2-1]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2163 int a3= src[x + HTAPS_MAX/2+0]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2164 int a4= src[x + HTAPS_MAX/2+1]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2165 int a5= src[x + HTAPS_MAX/2+2]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2166 int a6= src[x + HTAPS_MAX/2+3]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2167 int am=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2168 if(!p || p->fast_mc){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2169 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2170 tmpI[x]= am; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2171 am= (am+16)>>5; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2172 }else{ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2173 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2174 tmpI[x]= am; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2175 am= (am+32)>>6; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2176 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2177 |
5649 | 2178 if(am&(~255)) am= ~(am>>31); |
2179 tmp2[x]= am; | |
2180 } | |
2181 tmpI+= 64; | |
2182 tmp2+= stride; | |
2183 src += stride; | |
2138 | 2184 } |
5649 | 2185 src -= stride*y; |
5648 | 2186 } |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2187 src += HTAPS_MAX/2 - 1; |
5648 | 2188 tmp2= tmp2t[1]; |
2189 | |
2190 if(b&2){ | |
5649 | 2191 for(y=0; y < b_h; y++){ |
2192 for(x=0; x < b_w+1; x++){ | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2193 int a_1=src[x + (HTAPS_MAX/2-4)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2194 int a0= src[x + (HTAPS_MAX/2-3)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2195 int a1= src[x + (HTAPS_MAX/2-2)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2196 int a2= src[x + (HTAPS_MAX/2-1)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2197 int a3= src[x + (HTAPS_MAX/2+0)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2198 int a4= src[x + (HTAPS_MAX/2+1)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2199 int a5= src[x + (HTAPS_MAX/2+2)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2200 int a6= src[x + (HTAPS_MAX/2+3)*stride]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2201 int am=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2202 if(!p || p->fast_mc) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2203 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2204 else |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2205 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2206 |
5649 | 2207 if(am&(~255)) am= ~(am>>31); |
2208 tmp2[x]= am; | |
2209 } | |
2210 src += stride; | |
2211 tmp2+= stride; | |
5648 | 2212 } |
5649 | 2213 src -= stride*y; |
5648 | 2214 } |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2215 src += stride*(HTAPS_MAX/2 - 1); |
5648 | 2216 tmp2= tmp2t[2]; |
2217 tmpI= tmpIt; | |
2218 if(b&4){ | |
2219 for(y=0; y < b_h; y++){ | |
2220 for(x=0; x < b_w; x++){ | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2221 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2222 int a0= tmpI[x + (HTAPS_MAX/2-3)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2223 int a1= tmpI[x + (HTAPS_MAX/2-2)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2224 int a2= tmpI[x + (HTAPS_MAX/2-1)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2225 int a3= tmpI[x + (HTAPS_MAX/2+0)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2226 int a4= tmpI[x + (HTAPS_MAX/2+1)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2227 int a5= tmpI[x + (HTAPS_MAX/2+2)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2228 int a6= tmpI[x + (HTAPS_MAX/2+3)*64]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2229 int am=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2230 if(!p || p->fast_mc) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2231 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2232 else |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2233 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12; |
5648 | 2234 if(am&(~255)) am= ~(am>>31); |
2235 tmp2[x]= am; | |
2236 } | |
2237 tmpI+= 64; | |
2238 tmp2+= stride; | |
2138 | 2239 } |
5648 | 2240 } |
2241 | |
2242 hpel[ 0]= src; | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2243 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1); |
5648 | 2244 hpel[ 2]= src + 1; |
2245 | |
2246 hpel[ 4]= tmp2t[1]; | |
2247 hpel[ 5]= tmp2t[2]; | |
2248 hpel[ 6]= tmp2t[1] + 1; | |
2249 | |
2250 hpel[ 8]= src + stride; | |
2251 hpel[ 9]= hpel[1] + stride; | |
2252 hpel[10]= hpel[8] + 1; | |
2253 | |
2254 if(b==15){ | |
6205 | 2255 const uint8_t *src1= hpel[dx/8 + dy/8*4 ]; |
2256 const uint8_t *src2= hpel[dx/8 + dy/8*4+1]; | |
2257 const uint8_t *src3= hpel[dx/8 + dy/8*4+4]; | |
2258 const uint8_t *src4= hpel[dx/8 + dy/8*4+5]; | |
5648 | 2259 dx&=7; |
2260 dy&=7; | |
2261 for(y=0; y < b_h; y++){ | |
2262 for(x=0; x < b_w; x++){ | |
2263 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+ | |
2264 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6; | |
2265 } | |
2266 src1+=stride; | |
2267 src2+=stride; | |
2268 src3+=stride; | |
2269 src4+=stride; | |
2270 dst +=stride; | |
2271 } | |
2272 }else{ | |
6205 | 2273 const uint8_t *src1= hpel[l]; |
2274 const uint8_t *src2= hpel[r]; | |
5648 | 2275 int a= weight[((dx&7) + (8*(dy&7)))]; |
2276 int b= 8-a; | |
2277 for(y=0; y < b_h; y++){ | |
2278 for(x=0; x < b_w; x++){ | |
2279 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3; | |
2280 } | |
2281 src1+=stride; | |
2282 src2+=stride; | |
2283 dst +=stride; | |
2284 } | |
2138 | 2285 } |
2286 } | |
2287 | |
2288 #define mca(dx,dy,b_w)\ | |
5254 | 2289 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\ |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2290 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\ |
2138 | 2291 assert(h==b_w);\ |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2292 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\ |
2138 | 2293 } |
2294 | |
2295 mca( 0, 0,16) | |
2296 mca( 8, 0,16) | |
2297 mca( 0, 8,16) | |
2298 mca( 8, 8,16) | |
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2299 mca( 0, 0,8) |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2300 mca( 8, 0,8) |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2301 mca( 0, 8,8) |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2302 mca( 8, 8,8) |
2138 | 2303 |
3314 | 2304 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2305 if(block->type & BLOCK_INTRA){ |
2206 | 2306 int x, y; |
3018 | 2307 const int color = block->color[plane_index]; |
2308 const int color4= color*0x01010101; | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2309 if(b_w==32){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2310 for(y=0; y < b_h; y++){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2311 *(uint32_t*)&dst[0 + y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2312 *(uint32_t*)&dst[4 + y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2313 *(uint32_t*)&dst[8 + y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2314 *(uint32_t*)&dst[12+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2315 *(uint32_t*)&dst[16+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2316 *(uint32_t*)&dst[20+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2317 *(uint32_t*)&dst[24+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2318 *(uint32_t*)&dst[28+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2319 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2320 }else if(b_w==16){ |
3018 | 2321 for(y=0; y < b_h; y++){ |
2322 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2323 *(uint32_t*)&dst[4 + y*stride]= color4; | |
2324 *(uint32_t*)&dst[8 + y*stride]= color4; | |
2325 *(uint32_t*)&dst[12+ y*stride]= color4; | |
2326 } | |
2327 }else if(b_w==8){ | |
2328 for(y=0; y < b_h; y++){ | |
2329 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2330 *(uint32_t*)&dst[4 + y*stride]= color4; | |
2331 } | |
2332 }else if(b_w==4){ | |
2333 for(y=0; y < b_h; y++){ | |
2334 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2335 } | |
2336 }else{ | |
2337 for(y=0; y < b_h; y++){ | |
2338 for(x=0; x < b_w; x++){ | |
2339 dst[x + y*stride]= color; | |
2340 } | |
2138 | 2341 } |
2342 } | |
2343 }else{ | |
3314 | 2344 uint8_t *src= s->last_picture[block->ref].data[plane_index]; |
2206 | 2345 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale; |
2346 int mx= block->mx*scale; | |
2347 int my= block->my*scale; | |
2223 | 2348 const int dx= mx&15; |
2349 const int dy= my&15; | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3018
diff
changeset
|
2350 const int tab_index= 3 - (b_w>>2) + (b_w>>4); |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2351 sx += (mx>>4) - (HTAPS_MAX/2-1); |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2352 sy += (my>>4) - (HTAPS_MAX/2-1); |
2206 | 2353 src += sx + sy*stride; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2354 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2) |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2355 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){ |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2356 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h); |
2206 | 2357 src= tmp + MB_SIZE; |
2138 | 2358 } |
3189 | 2359 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); |
2360 // assert(!(b_w&(b_w-1))); | |
3018 | 2361 assert(b_w>1 && b_h>1); |
6206
ea591bc7522a
add parenthesis, fix warning: snow.c:2413: warning: suggest parentheses around && within ||
bcoudurier
parents:
6205
diff
changeset
|
2362 assert((tab_index>=0 && tab_index<4) || b_w==32); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2363 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc ) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2364 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2365 else if(b_w==32){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2366 int y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2367 for(y=0; y<b_h; y+=16){ |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2368 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2369 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2370 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2371 }else if(b_w==b_h) |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2372 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride); |
3018 | 2373 else if(b_w==2*b_h){ |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2374 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2375 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride); |
3018 | 2376 }else{ |
2377 assert(2*b_w==b_h); | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2378 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2379 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride); |
3018 | 2380 } |
2138 | 2381 } |
2382 } | |
2383 | |
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2384 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2385 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2386 int y, x; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2387 IDWTELEM * dst; |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2388 for(y=0; y<b_h; y++){ |
5409 | 2389 //FIXME ugly misuse of obmc_stride |
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2390 const uint8_t *obmc1= obmc + y*obmc_stride; |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2391 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2392 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2393 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2394 dst = slice_buffer_get_line(sb, src_y + y); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2395 for(x=0; x<b_w; x++){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2396 int v= obmc1[x] * block[3][x + y*src_stride] |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2397 +obmc2[x] * block[2][x + y*src_stride] |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2398 +obmc3[x] * block[1][x + y*src_stride] |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2399 +obmc4[x] * block[0][x + y*src_stride]; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2400 |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2401 v <<= 8 - LOG2_OBMC_MAX; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2402 if(FRAC_BITS != 8){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2403 v >>= 8 - FRAC_BITS; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2404 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2405 if(add){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2406 v += dst[x + src_x]; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2407 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2408 if(v&(~255)) v= ~(v>>31); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2409 dst8[x + y*src_stride] = v; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2410 }else{ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2411 dst[x + src_x] -= v; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2412 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2413 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2414 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2415 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2416 |
6412 | 2417 //FIXME name cleanup (b_w, block_w, b_width stuff) |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2418 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2419 const int b_width = s->b_width << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2420 const int b_height= s->b_height << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2421 const int b_stride= b_width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2422 BlockNode *lt= &s->block[b_x + b_y*b_stride]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2423 BlockNode *rt= lt+1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2424 BlockNode *lb= lt+b_stride; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2425 BlockNode *rb= lb+1; |
2967 | 2426 uint8_t *block[4]; |
2842 | 2427 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride; |
8214
c5276ad92ff8
snow: move scratch buffer from stack to malloced buffer in context
mru
parents:
7622
diff
changeset
|
2428 uint8_t *tmp = s->scratchbuf; |
2842 | 2429 uint8_t *ptmp; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2430 int x,y; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2431 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2432 if(b_x<0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2433 lt= rt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2434 lb= rb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2435 }else if(b_x + 1 >= b_width){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2436 rt= lt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2437 rb= lb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2438 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2439 if(b_y<0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2440 lt= lb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2441 rt= rb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2442 }else if(b_y + 1 >= b_height){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2443 lb= lt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2444 rb= rt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2445 } |
2967 | 2446 |
6412 | 2447 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2448 obmc -= src_x; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2449 b_w += src_x; |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2450 if(!sliced && !offset_dst) |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2451 dst -= src_x; |
2206 | 2452 src_x=0; |
2453 }else if(src_x + b_w > w){ | |
2454 b_w = w - src_x; | |
2455 } | |
2456 if(src_y<0){ | |
2457 obmc -= src_y*obmc_stride; | |
2458 b_h += src_y; | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2459 if(!sliced && !offset_dst) |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2460 dst -= src_y*dst_stride; |
2206 | 2461 src_y=0; |
2462 }else if(src_y + b_h> h){ | |
2463 b_h = h - src_y; | |
2464 } | |
2967 | 2465 |
2206 | 2466 if(b_w<=0 || b_h<=0) return; |
2467 | |
6414 | 2468 assert(src_stride > 2*MB_SIZE + 5); |
2469 | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2470 if(!sliced && offset_dst) |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2471 dst += src_x + src_y*dst_stride; |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2472 dst8+= src_x + src_y*src_stride; |
2206 | 2473 // src += src_x + src_y*src_stride; |
2474 | |
2842 | 2475 ptmp= tmp + 3*tmp_step; |
2476 block[0]= ptmp; | |
2477 ptmp+=tmp_step; | |
3314 | 2478 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); |
2206 | 2479 |
2480 if(same_block(lt, rt)){ | |
2481 block[1]= block[0]; | |
2482 }else{ | |
2842 | 2483 block[1]= ptmp; |
2484 ptmp+=tmp_step; | |
3314 | 2485 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); |
2206 | 2486 } |
2967 | 2487 |
2206 | 2488 if(same_block(lt, lb)){ |
2489 block[2]= block[0]; | |
2490 }else if(same_block(rt, lb)){ | |
2491 block[2]= block[1]; | |
2492 }else{ | |
2842 | 2493 block[2]= ptmp; |
2494 ptmp+=tmp_step; | |
3314 | 2495 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); |
2206 | 2496 } |
2497 | |
2498 if(same_block(lt, rb) ){ | |
2499 block[3]= block[0]; | |
2500 }else if(same_block(rt, rb)){ | |
2501 block[3]= block[1]; | |
2502 }else if(same_block(lb, rb)){ | |
2503 block[3]= block[2]; | |
2504 }else{ | |
2842 | 2505 block[3]= ptmp; |
3314 | 2506 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); |
2206 | 2507 } |
2508 #if 0 | |
2509 for(y=0; y<b_h; y++){ | |
2510 for(x=0; x<b_w; x++){ | |
2511 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); | |
2512 if(add) dst[x + y*dst_stride] += v; | |
2513 else dst[x + y*dst_stride] -= v; | |
2514 } | |
2515 } | |
2516 for(y=0; y<b_h; y++){ | |
2517 uint8_t *obmc2= obmc + (obmc_stride>>1); | |
2518 for(x=0; x<b_w; x++){ | |
2519 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); | |
2520 if(add) dst[x + y*dst_stride] += v; | |
2521 else dst[x + y*dst_stride] -= v; | |
2522 } | |
2523 } | |
2524 for(y=0; y<b_h; y++){ | |
2525 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2526 for(x=0; x<b_w; x++){ | |
2527 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); | |
2528 if(add) dst[x + y*dst_stride] += v; | |
2529 else dst[x + y*dst_stride] -= v; | |
2530 } | |
2531 } | |
2532 for(y=0; y<b_h; y++){ | |
2533 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2534 uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2535 for(x=0; x<b_w; x++){ | |
2536 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); | |
2537 if(add) dst[x + y*dst_stride] += v; | |
2538 else dst[x + y*dst_stride] -= v; | |
2539 } | |
2540 } | |
2541 #else | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2542 if(sliced){ |
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2543 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); |
6475 | 2544 }else{ |
2545 for(y=0; y<b_h; y++){ | |
2546 //FIXME ugly misuse of obmc_stride | |
2547 const uint8_t *obmc1= obmc + y*obmc_stride; | |
2548 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); | |
2549 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); | |
2550 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2551 for(x=0; x<b_w; x++){ | |
2552 int v= obmc1[x] * block[3][x + y*src_stride] | |
2553 +obmc2[x] * block[2][x + y*src_stride] | |
2554 +obmc3[x] * block[1][x + y*src_stride] | |
2555 +obmc4[x] * block[0][x + y*src_stride]; | |
2556 | |
2557 v <<= 8 - LOG2_OBMC_MAX; | |
2558 if(FRAC_BITS != 8){ | |
2559 v >>= 8 - FRAC_BITS; | |
2560 } | |
2561 if(add){ | |
2562 v += dst[x + y*dst_stride]; | |
2563 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; | |
2564 if(v&(~255)) v= ~(v>>31); | |
2565 dst8[x + y*src_stride] = v; | |
2566 }else{ | |
2567 dst[x + y*dst_stride] -= v; | |
2568 } | |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2569 } |
2206 | 2570 } |
2571 } | |
5910 | 2572 #endif /* 0 */ |
2206 | 2573 } |
2574 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2575 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2576 Plane *p= &s->plane[plane_index]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2577 const int mb_w= s->b_width << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2578 const int mb_h= s->b_height << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2579 int x, y, mb_x; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2580 int block_size = MB_SIZE >> s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2581 int block_w = plane_index ? block_size/2 : block_size; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2582 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2583 int obmc_stride= plane_index ? block_size : 2*block_size; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2584 int ref_stride= s->current_picture.linesize[plane_index]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2585 uint8_t *dst8= s->current_picture.data[plane_index]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2586 int w= p->width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2587 int h= p->height; |
2967 | 2588 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2589 if(s->keyframe || (s->avctx->debug&512)){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2590 if(mb_y==mb_h) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2591 return; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2592 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2593 if(add){ |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
2594 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2595 // DWTELEM * line = slice_buffer_get_line(sb, y); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2596 IDWTELEM * line = sb->line[y]; |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
2597 for(x=0; x<w; x++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2598 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2599 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2600 v >>= FRAC_BITS; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2601 if(v&(~255)) v= ~(v>>31); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2602 dst8[x + y*ref_stride]= v; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2603 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2604 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2605 }else{ |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
2606 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2607 // DWTELEM * line = slice_buffer_get_line(sb, y); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2608 IDWTELEM * line = sb->line[y]; |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
2609 for(x=0; x<w; x++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2610 line[x] -= 128 << FRAC_BITS; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2611 // buf[x + y*w]-= 128<<FRAC_BITS; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2612 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2613 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2614 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2615 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2616 return; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2617 } |
2967 | 2618 |
6475 | 2619 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
2620 add_yblock(s, 1, sb, old_buffer, dst8, obmc, | |
2621 block_w*mb_x - block_w/2, | |
2622 block_w*mb_y - block_w/2, | |
2623 block_w, block_w, | |
2624 w, h, | |
2625 w, ref_stride, obmc_stride, | |
2626 mb_x - 1, mb_y - 1, | |
2627 add, 0, plane_index); | |
2628 } | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2629 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2630 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2631 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){ |
2138 | 2632 Plane *p= &s->plane[plane_index]; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2633 const int mb_w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2634 const int mb_h= s->b_height << s->block_max_depth; |
2562 | 2635 int x, y, mb_x; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2636 int block_size = MB_SIZE >> s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2637 int block_w = plane_index ? block_size/2 : block_size; |
2206 | 2638 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2639 const int obmc_stride= plane_index ? block_size : 2*block_size; |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2640 int ref_stride= s->current_picture.linesize[plane_index]; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2641 uint8_t *dst8= s->current_picture.data[plane_index]; |
2138 | 2642 int w= p->width; |
2643 int h= p->height; | |
2967 | 2644 |
2206 | 2645 if(s->keyframe || (s->avctx->debug&512)){ |
2562 | 2646 if(mb_y==mb_h) |
2647 return; | |
2648 | |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2649 if(add){ |
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2650 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2651 for(x=0; x<w; x++){ |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2652 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2653 v >>= FRAC_BITS; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2654 if(v&(~255)) v= ~(v>>31); |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2655 dst8[x + y*ref_stride]= v; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2656 } |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2657 } |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2658 }else{ |
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2659 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2660 for(x=0; x<w; x++){ |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2661 buf[x + y*w]-= 128<<FRAC_BITS; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2662 } |
2206 | 2663 } |
2138 | 2664 } |
2206 | 2665 |
2666 return; | |
2138 | 2667 } |
2967 | 2668 |
6414 | 2669 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
2670 add_yblock(s, 0, NULL, buf, dst8, obmc, | |
2671 block_w*mb_x - block_w/2, | |
2672 block_w*mb_y - block_w/2, | |
2673 block_w, block_w, | |
2674 w, h, | |
2675 w, ref_stride, obmc_stride, | |
2676 mb_x - 1, mb_y - 1, | |
2677 add, 1, plane_index); | |
2678 } | |
2562 | 2679 } |
2680 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2681 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){ |
2562 | 2682 const int mb_h= s->b_height << s->block_max_depth; |
2683 int mb_y; | |
2684 for(mb_y=0; mb_y<=mb_h; mb_y++) | |
2685 predict_slice(s, buf, plane_index, add, mb_y); | |
2138 | 2686 } |
2687 | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2688 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2689 int i, x2, y2; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2690 Plane *p= &s->plane[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2691 const int block_size = MB_SIZE >> s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2692 const int block_w = plane_index ? block_size/2 : block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2693 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2694 const int obmc_stride= plane_index ? block_size : 2*block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2695 const int ref_stride= s->current_picture.linesize[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2696 uint8_t *src= s-> input_picture.data[plane_index]; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2697 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2698 const int b_stride = s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2699 const int w= p->width; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2700 const int h= p->height; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2701 int index= mb_x + mb_y*b_stride; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2702 BlockNode *b= &s->block[index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2703 BlockNode backup= *b; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2704 int ab=0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2705 int aa=0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2706 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2707 b->type|= BLOCK_INTRA; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2708 b->color[plane_index]= 0; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2709 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM)); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2710 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2711 for(i=0; i<4; i++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2712 int mb_x2= mb_x + (i &1) - 1; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2713 int mb_y2= mb_y + (i>>1) - 1; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2714 int x= block_w*mb_x2 + block_w/2; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2715 int y= block_w*mb_y2 + block_w/2; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2716 |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2717 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc, |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2718 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2719 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2720 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2721 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2722 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2723 int obmc_v= obmc[index]; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2724 int d; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2725 if(y<0) obmc_v += obmc[index + block_w*obmc_stride]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2726 if(x<0) obmc_v += obmc[index + block_w]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2727 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2728 if(x+block_w>w) obmc_v += obmc[index - block_w]; |
6412 | 2729 //FIXME precalculate this or simplify it somehow else |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2730 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2731 d = -dst[index] + (1<<(FRAC_BITS-1)); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2732 dst[index] = d; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2733 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v; |
6412 | 2734 aa += obmc_v * obmc_v; //FIXME precalculate this |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2735 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2736 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2737 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2738 *b= backup; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2739 |
5127 | 2740 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2741 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2742 |
3051 | 2743 static inline int get_block_bits(SnowContext *s, int x, int y, int w){ |
2744 const int b_stride = s->b_width << s->block_max_depth; | |
2745 const int b_height = s->b_height<< s->block_max_depth; | |
2746 int index= x + y*b_stride; | |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2747 const BlockNode *b = &s->block[index]; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2748 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2749 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2750 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2751 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl; |
3051 | 2752 int dmx, dmy; |
4001 | 2753 // int mx_context= av_log2(2*FFABS(left->mx - top->mx)); |
2754 // int my_context= av_log2(2*FFABS(left->my - top->my)); | |
3051 | 2755 |
2756 if(x<0 || x>=b_stride || y>=b_height) | |
2757 return 0; | |
2758 /* | |
2759 1 0 0 | |
2760 01X 1-2 1 | |
2761 001XX 3-6 2-3 | |
2762 0001XXX 7-14 4-7 | |
2763 00001XXXX 15-30 8-15 | |
2764 */ | |
2765 //FIXME try accurate rate | |
6412 | 2766 //FIXME intra and inter predictors if surrounding blocks are not the same type |
3051 | 2767 if(b->type & BLOCK_INTRA){ |
4001 | 2768 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0])) |
2769 + av_log2(2*FFABS(left->color[1] - b->color[1])) | |
2770 + av_log2(2*FFABS(left->color[2] - b->color[2]))); | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2771 }else{ |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2772 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2773 dmx-= b->mx; |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2774 dmy-= b->my; |
4001 | 2775 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda |
2776 + av_log2(2*FFABS(dmy)) | |
3314 | 2777 + av_log2(2*b->ref)); |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2778 } |
3051 | 2779 } |
2780 | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2781 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2782 Plane *p= &s->plane[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2783 const int block_size = MB_SIZE >> s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2784 const int block_w = plane_index ? block_size/2 : block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2785 const int obmc_stride= plane_index ? block_size : 2*block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2786 const int ref_stride= s->current_picture.linesize[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2787 uint8_t *dst= s->current_picture.data[plane_index]; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2788 uint8_t *src= s-> input_picture.data[plane_index]; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2789 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; |
8214
c5276ad92ff8
snow: move scratch buffer from stack to malloced buffer in context
mru
parents:
7622
diff
changeset
|
2790 uint8_t *cur = s->scratchbuf; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2791 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2792 const int b_stride = s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2793 const int b_height = s->b_height<< s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2794 const int w= p->width; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2795 const int h= p->height; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2796 int distortion; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2797 int rate= 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2798 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2799 int sx= block_w*mb_x - block_w/2; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2800 int sy= block_w*mb_y - block_w/2; |
3206 | 2801 int x0= FFMAX(0,-sx); |
2802 int y0= FFMAX(0,-sy); | |
2803 int x1= FFMIN(block_w*2, w-sx); | |
2804 int y1= FFMIN(block_w*2, h-sy); | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2805 int i,x,y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2806 |
3314 | 2807 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2808 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2809 for(y=y0; y<y1; y++){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2810 const uint8_t *obmc1= obmc_edged + y*obmc_stride; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2811 const IDWTELEM *pred1 = pred + y*obmc_stride; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2812 uint8_t *cur1 = cur + y*ref_stride; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2813 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2814 for(x=x0; x<x1; x++){ |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2815 #if FRAC_BITS >= LOG2_OBMC_MAX |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2816 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2817 #else |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2818 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2819 #endif |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2820 v = (v + pred1[x]) >> FRAC_BITS; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2821 if(v&(~255)) v= ~(v>>31); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2822 dst1[x] = v; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2823 } |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2824 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2825 |
3206 | 2826 /* copy the regions where obmc[] = (uint8_t)256 */ |
2827 if(LOG2_OBMC_MAX == 8 | |
2828 && (mb_x == 0 || mb_x == b_stride-1) | |
2829 && (mb_y == 0 || mb_y == b_height-1)){ | |
2830 if(mb_x == 0) | |
2831 x1 = block_w; | |
2832 else | |
2833 x0 = block_w; | |
2834 if(mb_y == 0) | |
2835 y1 = block_w; | |
2836 else | |
2837 y0 = block_w; | |
2838 for(y=y0; y<y1; y++) | |
2839 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0); | |
2840 } | |
2841 | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2842 if(block_w==16){ |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2843 /* FIXME rearrange dsputil to fit 32x32 cmp functions */ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2844 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */ |
6412 | 2845 /* FIXME cmps overlap but do not cover the wavelet's whole support. |
2846 * So improving the score of one block is not strictly guaranteed | |
2847 * to improve the score of the whole frame, thus iterative motion | |
2848 * estimation does not always converge. */ | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2849 if(s->avctx->me_cmp == FF_CMP_W97) |
4197 | 2850 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2851 else if(s->avctx->me_cmp == FF_CMP_W53) |
4197 | 2852 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2853 else{ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2854 distortion = 0; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2855 for(i=0; i<4; i++){ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2856 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2857 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16); |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2858 } |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2859 } |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2860 }else{ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2861 assert(block_w==8); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2862 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2863 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2864 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2865 if(plane_index==0){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2866 for(i=0; i<4; i++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2867 /* ..RRr |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2868 * .RXx. |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2869 * rxx.. |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2870 */ |
3051 | 2871 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2872 } |
3057 | 2873 if(mb_x == b_stride-2) |
2874 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1); | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2875 } |
3051 | 2876 return distortion + rate*penalty_factor; |
2877 } | |
2878 | |
2879 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ | |
2880 int i, y2; | |
2881 Plane *p= &s->plane[plane_index]; | |
2882 const int block_size = MB_SIZE >> s->block_max_depth; | |
2883 const int block_w = plane_index ? block_size/2 : block_size; | |
2884 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
2885 const int obmc_stride= plane_index ? block_size : 2*block_size; | |
2886 const int ref_stride= s->current_picture.linesize[plane_index]; | |
2887 uint8_t *dst= s->current_picture.data[plane_index]; | |
2888 uint8_t *src= s-> input_picture.data[plane_index]; | |
6426 | 2889 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst |
2890 // const has only been removed from zero_dst to suppress a warning | |
2891 static IDWTELEM zero_dst[4096]; //FIXME | |
3051 | 2892 const int b_stride = s->b_width << s->block_max_depth; |
2893 const int w= p->width; | |
2894 const int h= p->height; | |
2895 int distortion= 0; | |
2896 int rate= 0; | |
2897 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); | |
2898 | |
2899 for(i=0; i<9; i++){ | |
2900 int mb_x2= mb_x + (i%3) - 1; | |
2901 int mb_y2= mb_y + (i/3) - 1; | |
2902 int x= block_w*mb_x2 + block_w/2; | |
2903 int y= block_w*mb_y2 + block_w/2; | |
2904 | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2905 add_yblock(s, 0, NULL, zero_dst, dst, obmc, |
3051 | 2906 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index); |
2907 | |
2908 //FIXME find a cleaner/simpler way to skip the outside stuff | |
2909 for(y2= y; y2<0; y2++) | |
2910 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | |
2911 for(y2= h; y2<y+block_w; y2++) | |
2912 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | |
2913 if(x<0){ | |
2914 for(y2= y; y2<y+block_w; y2++) | |
2915 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x); | |
2916 } | |
2917 if(x+block_w > w){ | |
2918 for(y2= y; y2<y+block_w; y2++) | |
2919 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w); | |
2920 } | |
2921 | |
2922 assert(block_w== 8 || block_w==16); | |
2923 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w); | |
2924 } | |
2925 | |
2926 if(plane_index==0){ | |
2927 BlockNode *b= &s->block[mb_x+mb_y*b_stride]; | |
2928 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1); | |
2929 | |
2930 /* ..RRRr | |
2931 * .RXXx. | |
2932 * .RXXx. | |
2933 * rxxx. | |
2934 */ | |
2935 if(merged) | |
2936 rate = get_block_bits(s, mb_x, mb_y, 2); | |
2937 for(i=merged?4:0; i<9; i++){ | |
2938 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}}; | |
2939 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1); | |
2940 } | |
2941 } | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2942 return distortion + rate*penalty_factor; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2943 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2944 |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2945 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2946 const int b_stride= s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2947 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2948 BlockNode backup= *block; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2949 int rd, index, value; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2950 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2951 assert(mb_x>=0 && mb_y>=0); |
2994 | 2952 assert(mb_x<b_stride); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2953 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2954 if(intra){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2955 block->color[0] = p[0]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2956 block->color[1] = p[1]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2957 block->color[2] = p[2]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2958 block->type |= BLOCK_INTRA; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2959 }else{ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2960 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1); |
3314 | 2961 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2962 if(s->me_cache[index] == value) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2963 return 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2964 s->me_cache[index]= value; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2965 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2966 block->mx= p[0]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2967 block->my= p[1]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2968 block->type &= ~BLOCK_INTRA; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2969 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2970 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2971 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2972 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2973 //FIXME chroma |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2974 if(rd < *best_rd){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2975 *best_rd= rd; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2976 return 1; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2977 }else{ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2978 *block= backup; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2979 return 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2980 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2981 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2982 |
6412 | 2983 /* special case for int[2] args we discard afterwards, |
2984 * fixes compilation problem with gcc 2.95 */ | |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2985 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){ |
2999
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2986 int p[2] = {p0, p1}; |
3197 | 2987 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd); |
2999
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2988 } |
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2989 |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2990 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){ |
3051 | 2991 const int b_stride= s->b_width << s->block_max_depth; |
2992 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; | |
2993 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]}; | |
2994 int rd, index, value; | |
2995 | |
2996 assert(mb_x>=0 && mb_y>=0); | |
2997 assert(mb_x<b_stride); | |
2998 assert(((mb_x|mb_y)&1) == 0); | |
2999 | |
3000 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1); | |
3314 | 3001 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12); |
3051 | 3002 if(s->me_cache[index] == value) |
3003 return 0; | |
3004 s->me_cache[index]= value; | |
3005 | |
3006 block->mx= p0; | |
3007 block->my= p1; | |
3314 | 3008 block->ref= ref; |
3051 | 3009 block->type &= ~BLOCK_INTRA; |
3010 block[1]= block[b_stride]= block[b_stride+1]= *block; | |
3011 | |
3012 rd= get_4block_rd(s, mb_x, mb_y, 0); | |
3013 | |
3014 //FIXME chroma | |
3015 if(rd < *best_rd){ | |
3016 *best_rd= rd; | |
3017 return 1; | |
3018 }else{ | |
3019 block[0]= backup[0]; | |
3020 block[1]= backup[1]; | |
3021 block[b_stride]= backup[2]; | |
3022 block[b_stride+1]= backup[3]; | |
3023 return 0; | |
3024 } | |
3025 } | |
3026 | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3027 static void iterative_me(SnowContext *s){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3028 int pass, mb_x, mb_y; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3029 const int b_width = s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3030 const int b_height= s->b_height << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3031 const int b_stride= b_width; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3032 int color[3]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3033 |
3194
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3034 { |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3035 RangeCoder r = s->c; |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3036 uint8_t state[sizeof(s->block_state)]; |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3037 memcpy(state, s->block_state, sizeof(s->block_state)); |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3038 for(mb_y= 0; mb_y<s->b_height; mb_y++) |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3039 for(mb_x= 0; mb_x<s->b_width; mb_x++) |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3040 encode_q_branch(s, 0, mb_x, mb_y); |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3041 s->c = r; |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3042 memcpy(s->block_state, state, sizeof(s->block_state)); |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3043 } |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3044 |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
3045 for(pass=0; pass<25; pass++){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3046 int change= 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3047 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3048 for(mb_y= 0; mb_y<b_height; mb_y++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3049 for(mb_x= 0; mb_x<b_width; mb_x++){ |
3314 | 3050 int dia_change, i, j, ref; |
3051 int best_rd= INT_MAX, ref_rd; | |
3052 BlockNode backup, ref_b; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3053 const int index= mb_x + mb_y * b_stride; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3054 BlockNode *block= &s->block[index]; |
3324 | 3055 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL; |
3056 BlockNode *lb = mb_x ? &s->block[index -1] : NULL; | |
3057 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL; | |
3058 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL; | |
3059 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL; | |
3060 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL; | |
3061 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL; | |
3062 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL; | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3063 const int b_w= (MB_SIZE >> s->block_max_depth); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3064 uint8_t obmc_edged[b_w*2][b_w*2]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3065 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3066 if(pass && (block->type & BLOCK_OPT)) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3067 continue; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3068 block->type |= BLOCK_OPT; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3069 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3070 backup= *block; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3071 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3072 if(!s->me_cache_generation) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3073 memset(s->me_cache, 0, sizeof(s->me_cache)); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3074 s->me_cache_generation += 1<<22; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3075 |
6412 | 3076 //FIXME precalculate |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3077 { |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3078 int x, y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3079 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3080 if(mb_x==0) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3081 for(y=0; y<b_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3082 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3083 if(mb_x==b_stride-1) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3084 for(y=0; y<b_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3085 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3086 if(mb_y==0){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3087 for(x=0; x<b_w*2; x++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3088 obmc_edged[0][x] += obmc_edged[b_w-1][x]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3089 for(y=1; y<b_w; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3090 memcpy(obmc_edged[y], obmc_edged[0], b_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3091 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3092 if(mb_y==b_height-1){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3093 for(x=0; x<b_w*2; x++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3094 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3095 for(y=b_w; y<b_w*2-1; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3096 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3097 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3098 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3099 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3100 //skip stuff outside the picture |
6474
e6995f3fbf7f
cosmetics: Normalize {} placement after for, while, if.
diego
parents:
6437
diff
changeset
|
3101 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){ |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3102 uint8_t *src= s-> input_picture.data[0]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3103 uint8_t *dst= s->current_picture.data[0]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3104 const int stride= s->current_picture.linesize[0]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3105 const int block_w= MB_SIZE >> s->block_max_depth; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3106 const int sx= block_w*mb_x - block_w/2; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3107 const int sy= block_w*mb_y - block_w/2; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3108 const int w= s->plane[0].width; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3109 const int h= s->plane[0].height; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3110 int y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3111 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3112 for(y=sy; y<0; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3113 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3114 for(y=h; y<sy+block_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3115 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3116 if(sx<0){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3117 for(y=sy; y<sy+block_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3118 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3119 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3120 if(sx+block_w*2 > w){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3121 for(y=sy; y<sy+block_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3122 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3123 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3124 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3125 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3126 // intra(black) = neighbors' contribution to the current block |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3127 for(i=0; i<3; i++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3128 color[i]= get_dc(s, mb_x, mb_y, i); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3129 |
5127 | 3130 // get previous score (cannot be cached due to OBMC) |
3057 | 3131 if(pass > 0 && (block->type&BLOCK_INTRA)){ |
3132 int color0[3]= {block->color[0], block->color[1], block->color[2]}; | |
3133 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd); | |
3134 }else | |
3197 | 3135 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd); |
3136 | |
3314 | 3137 ref_b= *block; |
3138 ref_rd= best_rd; | |
3139 for(ref=0; ref < s->ref_frames; ref++){ | |
3140 int16_t (*mvr)[2]= &s->ref_mvs[ref][index]; | |
3141 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold | |
3142 continue; | |
3143 block->ref= ref; | |
3144 best_rd= INT_MAX; | |
3145 | |
3146 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd); | |
3147 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd); | |
3324 | 3148 if(tb) |
3314 | 3149 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd); |
3324 | 3150 if(lb) |
3314 | 3151 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd); |
3324 | 3152 if(rb) |
3314 | 3153 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd); |
3324 | 3154 if(bb) |
3314 | 3155 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd); |
3156 | |
3157 /* fullpel ME */ | |
6412 | 3158 //FIXME avoid subpel interpolation / round to nearest integer |
3314 | 3159 do{ |
3160 dia_change=0; | |
3161 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){ | |
3162 for(j=0; j<i; j++){ | |
3163 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); | |
3164 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); | |
3165 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); | |
3166 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); | |
3167 } | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3168 } |
3314 | 3169 }while(dia_change); |
3170 /* subpel ME */ | |
3171 do{ | |
3172 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},}; | |
3173 dia_change=0; | |
3174 for(i=0; i<8; i++) | |
3175 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd); | |
3176 }while(dia_change); | |
3177 //FIXME or try the standard 2 pass qpel or similar | |
3178 | |
3179 mvr[0][0]= block->mx; | |
3180 mvr[0][1]= block->my; | |
3181 if(ref_rd > best_rd){ | |
3182 ref_rd= best_rd; | |
3183 ref_b= *block; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3184 } |
3314 | 3185 } |
3186 best_rd= ref_rd; | |
3187 *block= ref_b; | |
2998 | 3188 #if 1 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3189 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3190 //FIXME RD style color selection |
2998 | 3191 #endif |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3192 if(!same_block(block, &backup)){ |
3324 | 3193 if(tb ) tb ->type &= ~BLOCK_OPT; |
3194 if(lb ) lb ->type &= ~BLOCK_OPT; | |
3195 if(rb ) rb ->type &= ~BLOCK_OPT; | |
3196 if(bb ) bb ->type &= ~BLOCK_OPT; | |
3197 if(tlb) tlb->type &= ~BLOCK_OPT; | |
3198 if(trb) trb->type &= ~BLOCK_OPT; | |
3199 if(blb) blb->type &= ~BLOCK_OPT; | |
3200 if(brb) brb->type &= ~BLOCK_OPT; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3201 change ++; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3202 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3203 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3204 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3205 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3206 if(!change) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3207 break; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3208 } |
3051 | 3209 |
3210 if(s->block_max_depth == 1){ | |
3211 int change= 0; | |
3212 for(mb_y= 0; mb_y<b_height; mb_y+=2){ | |
3213 for(mb_x= 0; mb_x<b_width; mb_x+=2){ | |
3324 | 3214 int i; |
3051 | 3215 int best_rd, init_rd; |
3216 const int index= mb_x + mb_y * b_stride; | |
3217 BlockNode *b[4]; | |
3218 | |
3219 b[0]= &s->block[index]; | |
3220 b[1]= b[0]+1; | |
3221 b[2]= b[0]+b_stride; | |
3222 b[3]= b[2]+1; | |
3223 if(same_block(b[0], b[1]) && | |
3224 same_block(b[0], b[2]) && | |
3225 same_block(b[0], b[3])) | |
3226 continue; | |
3227 | |
3228 if(!s->me_cache_generation) | |
3229 memset(s->me_cache, 0, sizeof(s->me_cache)); | |
3230 s->me_cache_generation += 1<<22; | |
3231 | |
3232 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0); | |
3233 | |
3314 | 3234 //FIXME more multiref search? |
3051 | 3235 check_4block_inter(s, mb_x, mb_y, |
3236 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2, | |
3314 | 3237 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd); |
3051 | 3238 |
3239 for(i=0; i<4; i++) | |
3240 if(!(b[i]->type&BLOCK_INTRA)) | |
3314 | 3241 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd); |
3051 | 3242 |
3243 if(init_rd != best_rd) | |
3244 change++; | |
3245 } | |
3246 } | |
3247 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); | |
3248 } | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3249 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3250 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3251 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){ |
2138 | 3252 const int w= b->width; |
3253 const int h= b->height; | |
4594 | 3254 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
5575 | 3255 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); |
2150 | 3256 int x,y, thres1, thres2; |
2138 | 3257 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3258 if(s->qlog == LOSSLESS_QLOG){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3259 for(y=0; y<h; y++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3260 for(x=0; x<w; x++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3261 dst[x + y*stride]= src[x + y*stride]; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3262 return; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3263 } |
2967 | 3264 |
2138 | 3265 bias= bias ? 0 : (3*qmul)>>3; |
2150 | 3266 thres1= ((qmul - bias)>>QEXPSHIFT) - 1; |
3267 thres2= 2*thres1; | |
2967 | 3268 |
2138 | 3269 if(!bias){ |
3270 for(y=0; y<h; y++){ | |
3271 for(x=0; x<w; x++){ | |
2150 | 3272 int i= src[x + y*stride]; |
2967 | 3273 |
2150 | 3274 if((unsigned)(i+thres1) > thres2){ |
3275 if(i>=0){ | |
3276 i<<= QEXPSHIFT; | |
3277 i/= qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3278 dst[x + y*stride]= i; |
2150 | 3279 }else{ |
3280 i= -i; | |
3281 i<<= QEXPSHIFT; | |
3282 i/= qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3283 dst[x + y*stride]= -i; |
2150 | 3284 } |
3285 }else | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3286 dst[x + y*stride]= 0; |
2138 | 3287 } |
3288 } | |
3289 }else{ | |
3290 for(y=0; y<h; y++){ | |
3291 for(x=0; x<w; x++){ | |
2967 | 3292 int i= src[x + y*stride]; |
3293 | |
2150 | 3294 if((unsigned)(i+thres1) > thres2){ |
3295 if(i>=0){ | |
3296 i<<= QEXPSHIFT; | |
3297 i= (i + bias) / qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3298 dst[x + y*stride]= i; |
2150 | 3299 }else{ |
3300 i= -i; | |
3301 i<<= QEXPSHIFT; | |
3302 i= (i + bias) / qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3303 dst[x + y*stride]= -i; |
2150 | 3304 } |
3305 }else | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3306 dst[x + y*stride]= 0; |
2138 | 3307 } |
3308 } | |
3309 } | |
3310 } | |
3311 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3312 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3313 const int w= b->width; |
4594 | 3314 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
2600 | 3315 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3316 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3317 int x,y; |
2967 | 3318 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3319 if(s->qlog == LOSSLESS_QLOG) return; |
2967 | 3320 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3321 for(y=start_y; y<end_y; y++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3322 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3323 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3324 for(x=0; x<w; x++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3325 int i= line[x]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3326 if(i<0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3327 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3328 }else if(i>0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3329 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3330 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3331 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3332 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3333 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3334 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3335 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ |
2138 | 3336 const int w= b->width; |
3337 const int h= b->height; | |
4594 | 3338 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
2600 | 3339 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
2138 | 3340 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
3341 int x,y; | |
2967 | 3342 |
2161 | 3343 if(s->qlog == LOSSLESS_QLOG) return; |
2967 | 3344 |
2138 | 3345 for(y=0; y<h; y++){ |
3346 for(x=0; x<w; x++){ | |
3347 int i= src[x + y*stride]; | |
3348 if(i<0){ | |
3349 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias | |
3350 }else if(i>0){ | |
3351 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT)); | |
3352 } | |
3353 } | |
3354 } | |
3355 } | |
3356 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3357 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ |
2138 | 3358 const int w= b->width; |
3359 const int h= b->height; | |
3360 int x,y; | |
2967 | 3361 |
2138 | 3362 for(y=h-1; y>=0; y--){ |
3363 for(x=w-1; x>=0; x--){ | |
3364 int i= x + y*stride; | |
2967 | 3365 |
2138 | 3366 if(x){ |
3367 if(use_median){ | |
3368 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); | |
3369 else src[i] -= src[i - 1]; | |
3370 }else{ | |
3371 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); | |
3372 else src[i] -= src[i - 1]; | |
3373 } | |
3374 }else{ | |
3375 if(y) src[i] -= src[i - stride]; | |
3376 } | |
3377 } | |
3378 } | |
3379 } | |
3380 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3381 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3382 const int w= b->width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3383 int x,y; |
2967 | 3384 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3385 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3386 IDWTELEM * prev; |
2967 | 3387 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3388 if (start_y != 0) |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3389 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
2967 | 3390 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3391 for(y=start_y; y<end_y; y++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3392 prev = line; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3393 // line = slice_buffer_get_line_from_address(sb, src + (y * stride)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3394 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3395 for(x=0; x<w; x++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3396 if(x){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3397 if(use_median){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3398 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3399 else line[x] += line[x - 1]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3400 }else{ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3401 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3402 else line[x] += line[x - 1]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3403 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3404 }else{ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3405 if(y) line[x] += prev[x]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3406 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3407 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3408 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3409 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3410 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3411 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ |
2138 | 3412 const int w= b->width; |
3413 const int h= b->height; | |
3414 int x,y; | |
2967 | 3415 |
2138 | 3416 for(y=0; y<h; y++){ |
3417 for(x=0; x<w; x++){ | |
3418 int i= x + y*stride; | |
2967 | 3419 |
2138 | 3420 if(x){ |
3421 if(use_median){ | |
3422 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); | |
3423 else src[i] += src[i - 1]; | |
3424 }else{ | |
3425 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); | |
3426 else src[i] += src[i - 1]; | |
3427 } | |
3428 }else{ | |
3429 if(y) src[i] += src[i - stride]; | |
3430 } | |
3431 } | |
3432 } | |
3433 } | |
3434 | |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3435 static void encode_qlogs(SnowContext *s){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3436 int plane_index, level, orientation; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3437 |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3438 for(plane_index=0; plane_index<2; plane_index++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3439 for(level=0; level<s->spatial_decomposition_count; level++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3440 for(orientation=level ? 1:0; orientation<4; orientation++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3441 if(orientation==2) continue; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3442 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3443 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3444 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3445 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3446 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3447 |
2138 | 3448 static void encode_header(SnowContext *s){ |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3449 int plane_index, i; |
2967 | 3450 uint8_t kstate[32]; |
3451 | |
3452 memset(kstate, MID_STATE, sizeof(kstate)); | |
2138 | 3453 |
2335 | 3454 put_rac(&s->c, kstate, s->keyframe); |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3455 if(s->keyframe || s->always_reset){ |
2199 | 3456 reset_contexts(s); |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3457 s->last_spatial_decomposition_type= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3458 s->last_qlog= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3459 s->last_qbias= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3460 s->last_mv_scale= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3461 s->last_block_max_depth= 0; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3462 for(plane_index=0; plane_index<2; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3463 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3464 p->last_htaps=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3465 p->last_diag_mc=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3466 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3467 } |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3468 } |
2138 | 3469 if(s->keyframe){ |
3470 put_symbol(&s->c, s->header_state, s->version, 0); | |
2335 | 3471 put_rac(&s->c, s->header_state, s->always_reset); |
2138 | 3472 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0); |
3473 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0); | |
3474 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); | |
3475 put_symbol(&s->c, s->header_state, s->colorspace_type, 0); | |
3476 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0); | |
3477 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0); | |
2335 | 3478 put_rac(&s->c, s->header_state, s->spatial_scalability); |
3479 // put_rac(&s->c, s->header_state, s->rate_scalability); | |
3314 | 3480 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0); |
2138 | 3481 |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3482 encode_qlogs(s); |
2138 | 3483 } |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3484 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3485 if(!s->keyframe){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3486 int update_mc=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3487 for(plane_index=0; plane_index<2; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3488 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3489 update_mc |= p->last_htaps != p->htaps; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3490 update_mc |= p->last_diag_mc != p->diag_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3491 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3492 } |
5667
9242e125395f
do not force the halfpel filter coeffs to be retransmitted on every frame
michael
parents:
5666
diff
changeset
|
3493 put_rac(&s->c, s->header_state, update_mc); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3494 if(update_mc){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3495 for(plane_index=0; plane_index<2; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3496 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3497 put_rac(&s->c, s->header_state, p->diag_mc); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3498 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3499 for(i= p->htaps/2; i; i--) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3500 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3501 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3502 } |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3503 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3504 put_rac(&s->c, s->header_state, 1); |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3505 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3506 encode_qlogs(s); |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3507 }else |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3508 put_rac(&s->c, s->header_state, 0); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3509 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3510 |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3511 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3512 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3513 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3514 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3515 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3516 |
6144 | 3517 } |
3518 | |
3519 static void update_last_header_values(SnowContext *s){ | |
3520 int plane_index; | |
3521 | |
3522 if(!s->keyframe){ | |
3523 for(plane_index=0; plane_index<2; plane_index++){ | |
3524 Plane *p= &s->plane[plane_index]; | |
3525 p->last_diag_mc= p->diag_mc; | |
3526 p->last_htaps = p->htaps; | |
3527 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); | |
3528 } | |
3529 } | |
3530 | |
6414 | 3531 s->last_spatial_decomposition_type = s->spatial_decomposition_type; |
3532 s->last_qlog = s->qlog; | |
3533 s->last_qbias = s->qbias; | |
3534 s->last_mv_scale = s->mv_scale; | |
3535 s->last_block_max_depth = s->block_max_depth; | |
3536 s->last_spatial_decomposition_count = s->spatial_decomposition_count; | |
2138 | 3537 } |
3538 | |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3539 static void decode_qlogs(SnowContext *s){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3540 int plane_index, level, orientation; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3541 |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3542 for(plane_index=0; plane_index<3; plane_index++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3543 for(level=0; level<s->spatial_decomposition_count; level++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3544 for(orientation=level ? 1:0; orientation<4; orientation++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3545 int q; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3546 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3547 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3548 else q= get_symbol(&s->c, s->header_state, 1); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3549 s->plane[plane_index].band[level][orientation].qlog= q; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3550 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3551 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3552 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3553 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3554 |
9367 | 3555 #define GET_S(dst, check) \ |
3556 tmp= get_symbol(&s->c, s->header_state, 0);\ | |
3557 if(!(check)){\ | |
3558 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\ | |
3559 return -1;\ | |
3560 }\ | |
3561 dst= tmp; | |
3562 | |
2138 | 3563 static int decode_header(SnowContext *s){ |
9365 | 3564 int plane_index, tmp; |
2335 | 3565 uint8_t kstate[32]; |
2138 | 3566 |
2967 | 3567 memset(kstate, MID_STATE, sizeof(kstate)); |
2335 | 3568 |
3569 s->keyframe= get_rac(&s->c, kstate); | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3570 if(s->keyframe || s->always_reset){ |
2199 | 3571 reset_contexts(s); |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3572 s->spatial_decomposition_type= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3573 s->qlog= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3574 s->qbias= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3575 s->mv_scale= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3576 s->block_max_depth= 0; |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3577 } |
2138 | 3578 if(s->keyframe){ |
9367 | 3579 GET_S(s->version, tmp <= 0U) |
2335 | 3580 s->always_reset= get_rac(&s->c, s->header_state); |
2138 | 3581 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0); |
3582 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0); | |
9371 | 3583 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS) |
2138 | 3584 s->colorspace_type= get_symbol(&s->c, s->header_state, 0); |
3585 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0); | |
3586 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0); | |
2335 | 3587 s->spatial_scalability= get_rac(&s->c, s->header_state); |
3588 // s->rate_scalability= get_rac(&s->c, s->header_state); | |
9367 | 3589 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES) |
3590 s->max_ref_frames++; | |
2138 | 3591 |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3592 decode_qlogs(s); |
2138 | 3593 } |
2967 | 3594 |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3595 if(!s->keyframe){ |
5667
9242e125395f
do not force the halfpel filter coeffs to be retransmitted on every frame
michael
parents:
5666
diff
changeset
|
3596 if(get_rac(&s->c, s->header_state)){ |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3597 for(plane_index=0; plane_index<2; plane_index++){ |
5807 | 3598 int htaps, i, sum=0; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3599 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3600 p->diag_mc= get_rac(&s->c, s->header_state); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3601 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
3602 if((unsigned)htaps > HTAPS_MAX || htaps==0) |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3603 return -1; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3604 p->htaps= htaps; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3605 for(i= htaps/2; i; i--){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3606 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3607 sum += p->hcoeff[i]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3608 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3609 p->hcoeff[0]= 32-sum; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3610 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3611 s->plane[2].diag_mc= s->plane[1].diag_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3612 s->plane[2].htaps = s->plane[1].htaps; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3613 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3614 } |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3615 if(get_rac(&s->c, s->header_state)){ |
9371 | 3616 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS) |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3617 decode_qlogs(s); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3618 } |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3619 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3620 |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3621 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1); |
9366 | 3622 if(s->spatial_decomposition_type > 1U){ |
2138 | 3623 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type); |
3624 return -1; | |
3625 } | |
9373
78c2ddc32bed
Make sure spatial_decomposition_count is not too large for picture size.
michael
parents:
9372
diff
changeset
|
3626 if(FFMIN(s->avctx-> width>>s->chroma_h_shift, |
78c2ddc32bed
Make sure spatial_decomposition_count is not too large for picture size.
michael
parents:
9372
diff
changeset
|
3627 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){ |
78c2ddc32bed
Make sure spatial_decomposition_count is not too large for picture size.
michael
parents:
9372
diff
changeset
|
3628 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count); |
78c2ddc32bed
Make sure spatial_decomposition_count is not too large for picture size.
michael
parents:
9372
diff
changeset
|
3629 return -1; |
78c2ddc32bed
Make sure spatial_decomposition_count is not too large for picture size.
michael
parents:
9372
diff
changeset
|
3630 } |
2967 | 3631 |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3632 s->qlog += get_symbol(&s->c, s->header_state, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3633 s->mv_scale += get_symbol(&s->c, s->header_state, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3634 s->qbias += get_symbol(&s->c, s->header_state, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3635 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1); |
3303
68721b62a528
sanity checks, some might have been exploitable ...
michael
parents:
3206
diff
changeset
|
3636 if(s->block_max_depth > 1 || s->block_max_depth < 0){ |
2952 | 3637 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth); |
3638 s->block_max_depth= 0; | |
3639 return -1; | |
3640 } | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3641 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3642 return 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3643 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3644 |
3075 | 3645 static void init_qexp(void){ |
2600 | 3646 int i; |
3647 double v=128; | |
3648 | |
3649 for(i=0; i<QROOT; i++){ | |
3650 qexp[i]= lrintf(v); | |
2967 | 3651 v *= pow(2, 1.0 / QROOT); |
2600 | 3652 } |
3653 } | |
3654 | |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
6481
diff
changeset
|
3655 static av_cold int common_init(AVCodecContext *avctx){ |
2138 | 3656 SnowContext *s = avctx->priv_data; |
3657 int width, height; | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
3658 int i, j; |
2138 | 3659 |
3660 s->avctx= avctx; | |
9365 | 3661 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe |
2967 | 3662 |
2138 | 3663 dsputil_init(&s->dsp, avctx); |
3664 | |
3665 #define mcf(dx,dy)\ | |
3666 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\ | |
3667 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\ | |
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3668 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3669 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3670 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3671 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4]; |
2138 | 3672 |
3673 mcf( 0, 0) | |
3674 mcf( 4, 0) | |
3675 mcf( 8, 0) | |
3676 mcf(12, 0) | |
3677 mcf( 0, 4) | |
3678 mcf( 4, 4) | |
3679 mcf( 8, 4) | |
3680 mcf(12, 4) | |
3681 mcf( 0, 8) | |
3682 mcf( 4, 8) | |
3683 mcf( 8, 8) | |
3684 mcf(12, 8) | |
3685 mcf( 0,12) | |
3686 mcf( 4,12) | |
3687 mcf( 8,12) | |
3688 mcf(12,12) | |
3689 | |
3690 #define mcfh(dx,dy)\ | |
3691 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\ | |
3692 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\ | |
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3693 mc_block_hpel ## dx ## dy ## 16;\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3694 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3695 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3696 mc_block_hpel ## dx ## dy ## 8; |
2138 | 3697 |
3698 mcfh(0, 0) | |
3699 mcfh(8, 0) | |
3700 mcfh(0, 8) | |
3701 mcfh(8, 8) | |
2600 | 3702 |
3703 if(!qexp[0]) | |
3704 init_qexp(); | |
3705 | |
2138 | 3706 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift); |
2967 | 3707 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3708 width= s->avctx->width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3709 height= s->avctx->height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3710 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3711 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM)); |
6412 | 3712 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here |
2967 | 3713 |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3714 for(i=0; i<MAX_REF_FRAMES; i++) |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3715 for(j=0; j<MAX_REF_FRAMES; j++) |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3716 scale_mv_ref[i][j] = 256*(i+1)/(j+1); |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3717 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3718 s->avctx->get_buffer(s->avctx, &s->mconly_picture); |
8214
c5276ad92ff8
snow: move scratch buffer from stack to malloced buffer in context
mru
parents:
7622
diff
changeset
|
3719 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE); |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3720 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3721 return 0; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3722 } |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3723 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3724 static int common_init_after_header(AVCodecContext *avctx){ |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3725 SnowContext *s = avctx->priv_data; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3726 int plane_index, level, orientation; |
2967 | 3727 |
3728 for(plane_index=0; plane_index<3; plane_index++){ | |
2138 | 3729 int w= s->avctx->width; |
3730 int h= s->avctx->height; | |
3731 | |
3732 if(plane_index){ | |
3733 w>>= s->chroma_h_shift; | |
3734 h>>= s->chroma_v_shift; | |
3735 } | |
3736 s->plane[plane_index].width = w; | |
3737 s->plane[plane_index].height= h; | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3738 |
2138 | 3739 for(level=s->spatial_decomposition_count-1; level>=0; level--){ |
3740 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3741 SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
2967 | 3742 |
2138 | 3743 b->buf= s->spatial_dwt_buffer; |
3744 b->level= level; | |
3745 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level); | |
3746 b->width = (w + !(orientation&1))>>1; | |
3747 b->height= (h + !(orientation>1))>>1; | |
2967 | 3748 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3749 b->stride_line = 1 << (s->spatial_decomposition_count - level); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3750 b->buf_x_offset = 0; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3751 b->buf_y_offset = 0; |
2967 | 3752 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3753 if(orientation&1){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3754 b->buf += (w+1)>>1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3755 b->buf_x_offset = (w+1)>>1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3756 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3757 if(orientation>1){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3758 b->buf += b->stride>>1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3759 b->buf_y_offset = b->stride_line >> 1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3760 } |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3761 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer); |
2967 | 3762 |
2138 | 3763 if(level) |
3764 b->parent= &s->plane[plane_index].band[level-1][orientation]; | |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3765 //FIXME avoid this realloc |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3766 av_freep(&b->x_coeff); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3767 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff)); |
2138 | 3768 } |
3769 w= (w+1)>>1; | |
3770 h= (h+1)>>1; | |
3771 } | |
3772 } | |
2967 | 3773 |
2138 | 3774 return 0; |
3775 } | |
3776 | |
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3777 static int qscale2qlog(int qscale){ |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3778 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2)) |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3779 + 61*QROOT/8; //<64 >60 |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3780 } |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3781 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3782 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict) |
3313 | 3783 { |
6412 | 3784 /* Estimate the frame's complexity as a sum of weighted dwt coefficients. |
3313 | 3785 * FIXME we know exact mv bits at this point, |
3786 * but ratecontrol isn't set up to include them. */ | |
3787 uint32_t coef_sum= 0; | |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3788 int level, orientation, delta_qlog; |
3313 | 3789 |
3790 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3791 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3792 SubBand *b= &s->plane[0].band[level][orientation]; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3793 IDWTELEM *buf= b->ibuf; |
3313 | 3794 const int w= b->width; |
3795 const int h= b->height; | |
3796 const int stride= b->stride; | |
4594 | 3797 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16); |
3313 | 3798 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
3799 const int qdiv= (1<<16)/qmul; | |
3800 int x, y; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3801 //FIXME this is ugly |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3802 for(y=0; y<h; y++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3803 for(x=0; x<w; x++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3804 buf[x+y*stride]= b->buf[x+y*stride]; |
3313 | 3805 if(orientation==0) |
3806 decorrelate(s, b, buf, stride, 1, 0); | |
3807 for(y=0; y<h; y++) | |
3808 for(x=0; x<w; x++) | |
3809 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16; | |
3810 } | |
3811 } | |
3812 | |
3813 /* ugly, ratecontrol just takes a sqrt again */ | |
3814 coef_sum = (uint64_t)coef_sum * coef_sum >> 16; | |
3815 assert(coef_sum < INT_MAX); | |
3816 | |
6481 | 3817 if(pict->pict_type == FF_I_TYPE){ |
3313 | 3818 s->m.current_picture.mb_var_sum= coef_sum; |
3819 s->m.current_picture.mc_mb_var_sum= 0; | |
3820 }else{ | |
3821 s->m.current_picture.mc_mb_var_sum= coef_sum; | |
3822 s->m.current_picture.mb_var_sum= 0; | |
3823 } | |
3824 | |
3825 pict->quality= ff_rate_estimate_qscale(&s->m, 1); | |
3766 | 3826 if (pict->quality < 0) |
4011
5bce97c30a69
-1 is a valid return value in ratecontrol_1pass() -> 100l for takis
michael
parents:
4001
diff
changeset
|
3827 return INT_MIN; |
3313 | 3828 s->lambda= pict->quality * 3/2; |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3829 delta_qlog= qscale2qlog(pict->quality) - s->qlog; |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3830 s->qlog+= delta_qlog; |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3831 return delta_qlog; |
3313 | 3832 } |
2138 | 3833 |
5909 | 3834 static void calculate_visual_weight(SnowContext *s, Plane *p){ |
2138 | 3835 int width = p->width; |
3836 int height= p->height; | |
2198 | 3837 int level, orientation, x, y; |
2138 | 3838 |
3839 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3840 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3841 SubBand *b= &p->band[level][orientation]; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3842 IDWTELEM *ibuf= b->ibuf; |
2138 | 3843 int64_t error=0; |
2967 | 3844 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3845 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3846 ibuf[b->width/2 + b->height/2*b->stride]= 256*16; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3847 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); |
2138 | 3848 for(y=0; y<height; y++){ |
3849 for(x=0; x<width; x++){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3850 int64_t d= s->spatial_idwt_buffer[x + y*width]*16; |
2138 | 3851 error += d*d; |
3852 } | |
3853 } | |
3854 | |
3855 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5); | |
3856 } | |
3857 } | |
3858 } | |
3859 | |
5702 | 3860 #define QUANTIZE2 0 |
3861 | |
3862 #if QUANTIZE2==1 | |
3863 #define Q2_STEP 8 | |
3864 | |
3865 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){ | |
3866 SubBand *b= &p->band[level][orientation]; | |
3867 int x, y; | |
3868 int xo=0; | |
3869 int yo=0; | |
3870 int step= 1 << (s->spatial_decomposition_count - level); | |
3871 | |
3872 if(orientation&1) | |
3873 xo= step>>1; | |
3874 if(orientation&2) | |
3875 yo= step>>1; | |
3876 | |
6412 | 3877 //FIXME bias for nonzero ? |
5702 | 3878 //FIXME optimize |
3879 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP)); | |
3880 for(y=0; y<p->height; y++){ | |
3881 for(x=0; x<p->width; x++){ | |
3882 int sx= (x-xo + step/2) / step / Q2_STEP; | |
3883 int sy= (y-yo + step/2) / step / Q2_STEP; | |
3884 int v= r0[x + y*p->width] - r1[x + y*p->width]; | |
3885 assert(sx>=0 && sy>=0 && sx < score_stride); | |
3886 v= ((v+8)>>4)<<4; | |
3887 score[sx + sy*score_stride] += v*v; | |
3888 assert(score[sx + sy*score_stride] >= 0); | |
3889 } | |
3890 } | |
3891 } | |
3892 | |
3893 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){ | |
3894 int level, orientation; | |
3895 | |
3896 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3897 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3898 SubBand *b= &p->band[level][orientation]; | |
3899 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer); | |
3900 | |
3901 dequantize(s, b, dst, b->stride); | |
3902 } | |
3903 } | |
3904 } | |
3905 | |
3906 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){ | |
3907 int level, orientation, ys, xs, x, y, pass; | |
3908 IDWTELEM best_dequant[height * stride]; | |
3909 IDWTELEM idwt2_buffer[height * stride]; | |
3910 const int score_stride= (width + 10)/Q2_STEP; | |
3911 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size | |
3912 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size | |
3913 int threshold= (s->m.lambda * s->m.lambda) >> 6; | |
3914 | |
3915 //FIXME pass the copy cleanly ? | |
3916 | |
3917 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM)); | |
3918 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count); | |
3919 | |
3920 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3921 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3922 SubBand *b= &p->band[level][orientation]; | |
3923 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); | |
3924 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer); | |
6412 | 3925 assert(src == b->buf); // code does not depend on this but it is true currently |
5702 | 3926 |
3927 quantize(s, b, dst, src, b->stride, s->qbias); | |
3928 } | |
3929 } | |
3930 for(pass=0; pass<1; pass++){ | |
3931 if(s->qbias == 0) //keyframe | |
3932 continue; | |
3933 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3934 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3935 SubBand *b= &p->band[level][orientation]; | |
3936 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer); | |
3937 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); | |
3938 | |
3939 for(ys= 0; ys<Q2_STEP; ys++){ | |
3940 for(xs= 0; xs<Q2_STEP; xs++){ | |
3941 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); | |
3942 dequantize_all(s, p, idwt2_buffer, width, height); | |
3943 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); | |
3944 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); | |
3945 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); | |
3946 for(y=ys; y<b->height; y+= Q2_STEP){ | |
3947 for(x=xs; x<b->width; x+= Q2_STEP){ | |
3948 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++; | |
3949 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--; | |
6412 | 3950 //FIXME try more than just -- |
5702 | 3951 } |
3952 } | |
3953 dequantize_all(s, p, idwt2_buffer, width, height); | |
3954 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); | |
3955 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); | |
3956 for(y=ys; y<b->height; y+= Q2_STEP){ | |
3957 for(x=xs; x<b->width; x+= Q2_STEP){ | |
3958 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride; | |
3959 if(score[score_idx] <= best_score[score_idx] + threshold){ | |
3960 best_score[score_idx]= score[score_idx]; | |
3961 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++; | |
3962 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--; | |
3963 //FIXME copy instead | |
3964 } | |
3965 } | |
3966 } | |
3967 } | |
3968 } | |
3969 } | |
3970 } | |
3971 } | |
6412 | 3972 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end |
5702 | 3973 } |
3974 | |
5910 | 3975 #endif /* QUANTIZE2==1 */ |
5702 | 3976 |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
6481
diff
changeset
|
3977 static av_cold int encode_init(AVCodecContext *avctx) |
2138 | 3978 { |
3979 SnowContext *s = avctx->priv_data; | |
2198 | 3980 int plane_index; |
2138 | 3981 |
2658
d1609cfeb1d0
#defines for strict_std_compliance and split between inofficial extensions and non standarized things
michael
parents:
2635
diff
changeset
|
3982 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){ |
6412 | 3983 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n" |
3984 "Use vstrict=-2 / -strict -2 to use it anyway.\n"); | |
2151 | 3985 return -1; |
3986 } | |
2967 | 3987 |
3327
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3988 if(avctx->prediction_method == DWT_97 |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3989 && (avctx->flags & CODEC_FLAG_QSCALE) |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3990 && avctx->global_quality == 0){ |
6412 | 3991 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n"); |
3327
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3992 return -1; |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3993 } |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3994 |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3995 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3996 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3997 s->chroma_h_shift= 1; //FIXME XXX |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3998 s->chroma_v_shift= 1; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3999 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4000 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4001 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4002 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4003 for(plane_index=0; plane_index<3; plane_index++){ |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4004 s->plane[plane_index].diag_mc= 1; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4005 s->plane[plane_index].htaps= 6; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4006 s->plane[plane_index].hcoeff[0]= 40; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4007 s->plane[plane_index].hcoeff[1]= -10; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4008 s->plane[plane_index].hcoeff[2]= 2; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4009 s->plane[plane_index].fast_mc= 1; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4010 } |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4011 |
2138 | 4012 common_init(avctx); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4013 alloc_blocks(s); |
2967 | 4014 |
2138 | 4015 s->version=0; |
2967 | 4016 |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4017 s->m.avctx = avctx; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4018 s->m.flags = avctx->flags; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4019 s->m.bit_rate= avctx->bit_rate; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4020 |
8239
dcfdb3352dde
Fix regression test failure with pthreads on multiprocessor systems.
michael
parents:
8214
diff
changeset
|
4021 s->m.me.temp = |
2138 | 4022 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); |
4023 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
4024 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
4025 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t)); |
2138 | 4026 h263_encode_init(&s->m); //mv_penalty |
4027 | |
3314 | 4028 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1); |
4029 | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4030 if(avctx->flags&CODEC_FLAG_PASS1){ |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4031 if(!avctx->stats_out) |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4032 avctx->stats_out = av_mallocz(256); |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4033 } |
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
4034 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){ |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4035 if(ff_rate_control_init(&s->m) < 0) |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4036 return -1; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4037 } |
3313 | 4038 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2)); |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4039 |
2138 | 4040 avctx->coded_frame= &s->current_picture; |
4041 switch(avctx->pix_fmt){ | |
4042 // case PIX_FMT_YUV444P: | |
4043 // case PIX_FMT_YUV422P: | |
4044 case PIX_FMT_YUV420P: | |
4045 case PIX_FMT_GRAY8: | |
4046 // case PIX_FMT_YUV411P: | |
4047 // case PIX_FMT_YUV410P: | |
4048 s->colorspace_type= 0; | |
4049 break; | |
4494
ce643a22f049
Replace deprecated PIX_FMT names by the newer variants.
diego
parents:
4436
diff
changeset
|
4050 /* case PIX_FMT_RGB32: |
2138 | 4051 s->colorspace= 1; |
4052 break;*/ | |
4053 default: | |
5908 | 4054 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n"); |
2138 | 4055 return -1; |
4056 } | |
4057 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); | |
4058 s->chroma_h_shift= 1; | |
4059 s->chroma_v_shift= 1; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4060 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4061 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4062 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4063 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4064 s->avctx->get_buffer(s->avctx, &s->input_picture); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4065 |
3314 | 4066 if(s->avctx->me_method == ME_ITER){ |
4067 int i; | |
4068 int size= s->b_width * s->b_height << 2*s->block_max_depth; | |
4069 for(i=0; i<s->max_ref_frames; i++){ | |
4070 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2])); | |
4071 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t)); | |
4072 } | |
4073 } | |
4074 | |
2138 | 4075 return 0; |
4076 } | |
4077 | |
5924 | 4078 #define USE_HALFPEL_PLANE 0 |
4079 | |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4080 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4081 int p,x,y; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4082 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4083 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE)); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4084 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4085 for(p=0; p<3; p++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4086 int is_chroma= !!p; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4087 int w= s->avctx->width >>is_chroma; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4088 int h= s->avctx->height >>is_chroma; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4089 int ls= frame->linesize[p]; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4090 uint8_t *src= frame->data[p]; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4091 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4092 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4093 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4094 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4095 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4096 halfpel[0][p]= src; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4097 for(y=0; y<h; y++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4098 for(x=0; x<w; x++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4099 int i= y*ls + x; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4100 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4101 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4102 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4103 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4104 for(y=0; y<h; y++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4105 for(x=0; x<w; x++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4106 int i= y*ls + x; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4107 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4108 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4109 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4110 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4111 src= halfpel[1][p]; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4112 for(y=0; y<h; y++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4113 for(x=0; x<w; x++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4114 int i= y*ls + x; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4115 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4116 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4117 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4118 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4119 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4120 //FIXME border! |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4121 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4122 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4123 |
9375
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4124 static void release_buffer(AVCodecContext *avctx){ |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4125 SnowContext *s = avctx->priv_data; |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4126 int i; |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4127 |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4128 if(s->last_picture[s->max_ref_frames-1].data[0]){ |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4129 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4130 for(i=0; i<9; i++) |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4131 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4132 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3])); |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4133 } |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4134 } |
2f04b571d69b
Move release_buffer() up so it is prior to a future call to it.
michael
parents:
9374
diff
changeset
|
4135 |
2138 | 4136 static int frame_start(SnowContext *s){ |
4137 AVFrame tmp; | |
2187 | 4138 int w= s->avctx->width; //FIXME round up to x16 ? |
4139 int h= s->avctx->height; | |
2138 | 4140 |
2187 | 4141 if(s->current_picture.data[0]){ |
6437 | 4142 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH ); |
4143 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2); | |
4144 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2); | |
2187 | 4145 } |
4146 | |
9376
e376facae584
Make sure the next used frame is released so get_buffer() wont fail.
michael
parents:
9375
diff
changeset
|
4147 release_buffer(s->avctx); |
e376facae584
Make sure the next used frame is released so get_buffer() wont fail.
michael
parents:
9375
diff
changeset
|
4148 |
3314 | 4149 tmp= s->last_picture[s->max_ref_frames-1]; |
4150 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame)); | |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4151 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4); |
5924 | 4152 if(USE_HALFPEL_PLANE && s->current_picture.data[0]) |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4153 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture); |
3314 | 4154 s->last_picture[0]= s->current_picture; |
2138 | 4155 s->current_picture= tmp; |
2967 | 4156 |
3314 | 4157 if(s->keyframe){ |
4158 s->ref_frames= 0; | |
4159 }else{ | |
4160 int i; | |
4161 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++) | |
4162 if(i && s->last_picture[i-1].key_frame) | |
4163 break; | |
4164 s->ref_frames= i; | |
9372
9bb96825fc10
Skip non intra frames that have no reference frames.
michael
parents:
9371
diff
changeset
|
4165 if(s->ref_frames==0){ |
9bb96825fc10
Skip non intra frames that have no reference frames.
michael
parents:
9371
diff
changeset
|
4166 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n"); |
9bb96825fc10
Skip non intra frames that have no reference frames.
michael
parents:
9371
diff
changeset
|
4167 return -1; |
9bb96825fc10
Skip non intra frames that have no reference frames.
michael
parents:
9371
diff
changeset
|
4168 } |
3314 | 4169 } |
4170 | |
2138 | 4171 s->current_picture.reference= 1; |
4172 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){ | |
4173 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |
4174 return -1; | |
4175 } | |
2967 | 4176 |
3314 | 4177 s->current_picture.key_frame= s->keyframe; |
4178 | |
2138 | 4179 return 0; |
4180 } | |
4181 | |
4182 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ | |
4183 SnowContext *s = avctx->priv_data; | |
2335 | 4184 RangeCoder * const c= &s->c; |
2138 | 4185 AVFrame *pict = data; |
4186 const int width= s->avctx->width; | |
4187 const int height= s->avctx->height; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4188 int level, orientation, plane_index, i, y; |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4189 uint8_t rc_header_bak[sizeof(s->header_state)]; |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4190 uint8_t rc_block_bak[sizeof(s->block_state)]; |
2138 | 4191 |
2335 | 4192 ff_init_range_encoder(c, buf, buf_size); |
4193 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
2967 | 4194 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4195 for(i=0; i<3; i++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4196 int shift= !!i; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4197 for(y=0; y<(height>>shift); y++) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4198 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]], |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4199 &pict->data[i][y * pict->linesize[i]], |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4200 width>>shift); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4201 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4202 s->new_picture = *pict; |
2138 | 4203 |
3313 | 4204 s->m.picture_number= avctx->frame_number; |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4205 if(avctx->flags&CODEC_FLAG_PASS2){ |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4206 s->m.pict_type = |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4207 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4208 s->keyframe= pict->pict_type==FF_I_TYPE; |
3766 | 4209 if(!(avctx->flags&CODEC_FLAG_QSCALE)) { |
3193 | 4210 pict->quality= ff_rate_estimate_qscale(&s->m, 0); |
3766 | 4211 if (pict->quality < 0) |
4212 return -1; | |
4213 } | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4214 }else{ |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4215 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0; |
3313 | 4216 s->m.pict_type= |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4217 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4218 } |
2967 | 4219 |
3313 | 4220 if(s->pass1_rc && avctx->frame_number == 0) |
4221 pict->quality= 2*FF_QP2LAMBDA; | |
2161 | 4222 if(pict->quality){ |
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
4223 s->qlog= qscale2qlog(pict->quality); |
3313 | 4224 s->lambda = pict->quality * 3/2; |
4225 } | |
4226 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){ | |
2161 | 4227 s->qlog= LOSSLESS_QLOG; |
3313 | 4228 s->lambda = 0; |
6412 | 4229 }//else keep previous frame's qlog until after motion estimation |
2138 | 4230 |
4231 frame_start(s); | |
4232 | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4233 s->m.current_picture_ptr= &s->m.current_picture; |
6481 | 4234 if(pict->pict_type == FF_P_TYPE){ |
2138 | 4235 int block_width = (width +15)>>4; |
4236 int block_height= (height+15)>>4; | |
4237 int stride= s->current_picture.linesize[0]; | |
2967 | 4238 |
2138 | 4239 assert(s->current_picture.data[0]); |
3314 | 4240 assert(s->last_picture[0].data[0]); |
2967 | 4241 |
2138 | 4242 s->m.avctx= s->avctx; |
4243 s->m.current_picture.data[0]= s->current_picture.data[0]; | |
3314 | 4244 s->m. last_picture.data[0]= s->last_picture[0].data[0]; |
2138 | 4245 s->m. new_picture.data[0]= s-> input_picture.data[0]; |
4246 s->m. last_picture_ptr= &s->m. last_picture; | |
4247 s->m.linesize= | |
4248 s->m. last_picture.linesize[0]= | |
4249 s->m. new_picture.linesize[0]= | |
4250 s->m.current_picture.linesize[0]= stride; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4251 s->m.uvlinesize= s->current_picture.linesize[1]; |
2138 | 4252 s->m.width = width; |
4253 s->m.height= height; | |
4254 s->m.mb_width = block_width; | |
4255 s->m.mb_height= block_height; | |
4256 s->m.mb_stride= s->m.mb_width+1; | |
4257 s->m.b8_stride= 2*s->m.mb_width+1; | |
4258 s->m.f_code=1; | |
4259 s->m.pict_type= pict->pict_type; | |
4260 s->m.me_method= s->avctx->me_method; | |
4261 s->m.me.scene_change_score=0; | |
4262 s->m.flags= s->avctx->flags; | |
4263 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0; | |
4264 s->m.out_format= FMT_H263; | |
4265 s->m.unrestricted_mv= 1; | |
4266 | |
3313 | 4267 s->m.lambda = s->lambda; |
2138 | 4268 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4269 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4270 |
2138 | 4271 s->m.dsp= s->dsp; //move |
4272 ff_init_me(&s->m); | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4273 s->dsp= s->m.dsp; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4274 } |
2967 | 4275 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4276 if(s->pass1_rc){ |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4277 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state)); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4278 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state)); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4279 } |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4280 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4281 redo_frame: |
2967 | 4282 |
6481 | 4283 if(pict->pict_type == FF_I_TYPE) |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4284 s->spatial_decomposition_count= 5; |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4285 else |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4286 s->spatial_decomposition_count= 5; |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4287 |
3313 | 4288 s->m.pict_type = pict->pict_type; |
6481 | 4289 s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0; |
2138 | 4290 |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4291 common_init_after_header(avctx); |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4292 |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4293 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4294 for(plane_index=0; plane_index<3; plane_index++){ |
5909 | 4295 calculate_visual_weight(s, &s->plane[plane_index]); |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4296 } |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4297 } |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4298 |
2138 | 4299 encode_header(s); |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4300 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start); |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4301 encode_blocks(s, 1); |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4302 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits; |
2967 | 4303 |
2138 | 4304 for(plane_index=0; plane_index<3; plane_index++){ |
4305 Plane *p= &s->plane[plane_index]; | |
4306 int w= p->width; | |
4307 int h= p->height; | |
4308 int x, y; | |
2198 | 4309 // int bits= put_bits_count(&s->c.pb); |
2138 | 4310 |
6414 | 4311 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){ |
4312 //FIXME optimize | |
4313 if(pict->data[plane_index]) //FIXME gray hack | |
4314 for(y=0; y<h; y++){ | |
4315 for(x=0; x<w; x++){ | |
4316 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS; | |
4317 } | |
4318 } | |
4319 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0); | |
4320 | |
4321 if( plane_index==0 | |
6481 | 4322 && pict->pict_type == FF_P_TYPE |
6414 | 4323 && !(avctx->flags&CODEC_FLAG_PASS2) |
4324 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){ | |
4325 ff_init_range_encoder(c, buf, buf_size); | |
4326 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
4327 pict->pict_type= FF_I_TYPE; | |
4328 s->keyframe=1; | |
4329 s->current_picture.key_frame=1; | |
4330 goto redo_frame; | |
2138 | 4331 } |
6414 | 4332 |
4333 if(s->qlog == LOSSLESS_QLOG){ | |
4334 for(y=0; y<h; y++){ | |
4335 for(x=0; x<w; x++){ | |
4336 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS; | |
4337 } | |
2161 | 4338 } |
6414 | 4339 }else{ |
4340 for(y=0; y<h; y++){ | |
4341 for(x=0; x<w; x++){ | |
4342 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS; | |
4343 } | |
5575 | 4344 } |
4345 } | |
6414 | 4346 |
4347 /* if(QUANTIZE2) | |
4348 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type); | |
4349 else*/ | |
4350 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
4351 | |
4352 if(s->pass1_rc && plane_index==0){ | |
4353 int delta_qlog = ratecontrol_1pass(s, pict); | |
4354 if (delta_qlog <= INT_MIN) | |
4355 return -1; | |
4356 if(delta_qlog){ | |
4357 //reordering qlog in the bitstream would eliminate this reset | |
4358 ff_init_range_encoder(c, buf, buf_size); | |
4359 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state)); | |
4360 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state)); | |
4361 encode_header(s); | |
4362 encode_blocks(s, 0); | |
2161 | 4363 } |
4364 } | |
6414 | 4365 |
4366 for(level=0; level<s->spatial_decomposition_count; level++){ | |
4367 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4368 SubBand *b= &p->band[level][orientation]; | |
4369 | |
4370 if(!QUANTIZE2) | |
4371 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias); | |
4372 if(orientation==0) | |
6481 | 4373 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0); |
6414 | 4374 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation); |
4375 assert(b->parent==NULL || b->parent->stride == b->stride*2); | |
4376 if(orientation==0) | |
4377 correlate(s, b, b->ibuf, b->stride, 1, 0); | |
4378 } | |
4379 } | |
4380 | |
4381 for(level=0; level<s->spatial_decomposition_count; level++){ | |
4382 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4383 SubBand *b= &p->band[level][orientation]; | |
4384 | |
4385 dequantize(s, b, b->ibuf, b->stride); | |
4386 } | |
4387 } | |
4388 | |
4389 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
4390 if(s->qlog == LOSSLESS_QLOG){ | |
4391 for(y=0; y<h; y++){ | |
4392 for(x=0; x<w; x++){ | |
4393 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS; | |
4394 } | |
4395 } | |
4396 } | |
4397 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); | |
4398 }else{ | |
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4399 //ME/MC only |
6481 | 4400 if(pict->pict_type == FF_I_TYPE){ |
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4401 for(y=0; y<h; y++){ |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4402 for(x=0; x<w; x++){ |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4403 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4404 pict->data[plane_index][y*pict->linesize[plane_index] + x]; |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4405 } |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4406 } |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4407 }else{ |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4408 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4409 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); |
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4410 } |
6414 | 4411 } |
2138 | 4412 if(s->avctx->flags&CODEC_FLAG_PSNR){ |
4413 int64_t error= 0; | |
2967 | 4414 |
6414 | 4415 if(pict->data[plane_index]) //FIXME gray hack |
4416 for(y=0; y<h; y++){ | |
4417 for(x=0; x<w; x++){ | |
4418 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]; | |
4419 error += d*d; | |
4420 } | |
2138 | 4421 } |
4422 s->avctx->error[plane_index] += error; | |
2232 | 4423 s->current_picture.error[plane_index] = error; |
2138 | 4424 } |
6414 | 4425 |
2138 | 4426 } |
4427 | |
6144 | 4428 update_last_header_values(s); |
4429 | |
9374 | 4430 release_buffer(avctx); |
2138 | 4431 |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4432 s->current_picture.coded_picture_number = avctx->frame_number; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4433 s->current_picture.pict_type = pict->pict_type; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4434 s->current_picture.quality = pict->quality; |
3313 | 4435 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start); |
4436 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits; | |
4437 s->m.current_picture.display_picture_number = | |
4438 s->m.current_picture.coded_picture_number = avctx->frame_number; | |
4439 s->m.current_picture.quality = pict->quality; | |
4440 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start); | |
4441 if(s->pass1_rc) | |
3766 | 4442 if (ff_rate_estimate_qscale(&s->m, 0) < 0) |
4443 return -1; | |
3313 | 4444 if(avctx->flags&CODEC_FLAG_PASS1) |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4445 ff_write_pass1_stats(&s->m); |
3313 | 4446 s->m.last_pict_type = s->m.pict_type; |
4123
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4447 avctx->frame_bits = s->m.frame_bits; |
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4448 avctx->mv_bits = s->m.mv_bits; |
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4449 avctx->misc_bits = s->m.misc_bits; |
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4450 avctx->p_tex_bits = s->m.p_tex_bits; |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4451 |
2138 | 4452 emms_c(); |
2967 | 4453 |
2335 | 4454 return ff_rac_terminate(c); |
2138 | 4455 } |
4456 | |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
6481
diff
changeset
|
4457 static av_cold void common_end(SnowContext *s){ |
3314 | 4458 int plane_index, level, orientation, i; |
2192 | 4459 |
2138 | 4460 av_freep(&s->spatial_dwt_buffer); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4461 av_freep(&s->spatial_idwt_buffer); |
2138 | 4462 |
8239
dcfdb3352dde
Fix regression test failure with pthreads on multiprocessor systems.
michael
parents:
8214
diff
changeset
|
4463 s->m.me.temp= NULL; |
2967 | 4464 av_freep(&s->m.me.scratchpad); |
2138 | 4465 av_freep(&s->m.me.map); |
4466 av_freep(&s->m.me.score_map); | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
4467 av_freep(&s->m.obmc_scratchpad); |
2967 | 4468 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4469 av_freep(&s->block); |
8214
c5276ad92ff8
snow: move scratch buffer from stack to malloced buffer in context
mru
parents:
7622
diff
changeset
|
4470 av_freep(&s->scratchbuf); |
2192 | 4471 |
3314 | 4472 for(i=0; i<MAX_REF_FRAMES; i++){ |
4473 av_freep(&s->ref_mvs[i]); | |
4474 av_freep(&s->ref_scores[i]); | |
4475 if(s->last_picture[i].data[0]) | |
4476 s->avctx->release_buffer(s->avctx, &s->last_picture[i]); | |
4477 } | |
4478 | |
2967 | 4479 for(plane_index=0; plane_index<3; plane_index++){ |
2192 | 4480 for(level=s->spatial_decomposition_count-1; level>=0; level--){ |
4481 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4482 SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
2967 | 4483 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4484 av_freep(&b->x_coeff); |
2192 | 4485 } |
4486 } | |
4487 } | |
2138 | 4488 } |
4489 | |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
6481
diff
changeset
|
4490 static av_cold int encode_end(AVCodecContext *avctx) |
2138 | 4491 { |
4492 SnowContext *s = avctx->priv_data; | |
4493 | |
4494 common_end(s); | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4495 av_free(avctx->stats_out); |
2138 | 4496 |
4497 return 0; | |
4498 } | |
4499 | |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
6481
diff
changeset
|
4500 static av_cold int decode_init(AVCodecContext *avctx) |
2138 | 4501 { |
2635 | 4502 avctx->pix_fmt= PIX_FMT_YUV420P; |
2138 | 4503 |
4504 common_init(avctx); | |
2967 | 4505 |
2138 | 4506 return 0; |
4507 } | |
4508 | |
9355
54bc8a2727b0
Implement avcodec_decode_video2(), _audio3() and _subtitle2() which takes an
rbultje
parents:
9291
diff
changeset
|
4509 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){ |
54bc8a2727b0
Implement avcodec_decode_video2(), _audio3() and _subtitle2() which takes an
rbultje
parents:
9291
diff
changeset
|
4510 const uint8_t *buf = avpkt->data; |
54bc8a2727b0
Implement avcodec_decode_video2(), _audio3() and _subtitle2() which takes an
rbultje
parents:
9291
diff
changeset
|
4511 int buf_size = avpkt->size; |
2138 | 4512 SnowContext *s = avctx->priv_data; |
2335 | 4513 RangeCoder * const c= &s->c; |
2138 | 4514 int bytes_read; |
4515 AVFrame *picture = data; | |
9374 | 4516 int level, orientation, plane_index; |
2138 | 4517 |
2335 | 4518 ff_init_range_decoder(c, buf, buf_size); |
4519 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
2138 | 4520 |
4521 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P | |
5668 | 4522 if(decode_header(s)<0) |
4523 return -1; | |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4524 common_init_after_header(avctx); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4525 |
5662
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4526 // realloc slice buffer for the case that spatial_decomposition_count changed |
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4527 slice_buffer_destroy(&s->sb); |
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4528 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer); |
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4529 |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4530 for(plane_index=0; plane_index<3; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4531 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4532 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4533 && p->hcoeff[1]==-10 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4534 && p->hcoeff[2]==2; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4535 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4536 |
9370 | 4537 alloc_blocks(s); |
2138 | 4538 |
9372
9bb96825fc10
Skip non intra frames that have no reference frames.
michael
parents:
9371
diff
changeset
|
4539 if(frame_start(s) < 0) |
9bb96825fc10
Skip non intra frames that have no reference frames.
michael
parents:
9371
diff
changeset
|
4540 return -1; |
6412 | 4541 //keyframe flag duplication mess FIXME |
2138 | 4542 if(avctx->debug&FF_DEBUG_PICT_INFO) |
4543 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog); | |
2967 | 4544 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4545 decode_blocks(s); |
2138 | 4546 |
4547 for(plane_index=0; plane_index<3; plane_index++){ | |
4548 Plane *p= &s->plane[plane_index]; | |
4549 int w= p->width; | |
4550 int h= p->height; | |
4551 int x, y; | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4552 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */ |
2967 | 4553 |
6414 | 4554 if(s->avctx->debug&2048){ |
4555 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); | |
4556 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); | |
4557 | |
4558 for(y=0; y<h; y++){ | |
4559 for(x=0; x<w; x++){ | |
4560 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; | |
4561 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; | |
4562 } | |
2138 | 4563 } |
4564 } | |
6414 | 4565 |
4566 { | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4567 for(level=0; level<s->spatial_decomposition_count; level++){ |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4568 for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4569 SubBand *b= &p->band[level][orientation]; |
6414 | 4570 unpack_coeffs(s, b, b->parent, orientation); |
4571 } | |
4572 } | |
4573 } | |
4574 | |
4575 { | |
4576 const int mb_h= s->b_height << s->block_max_depth; | |
4577 const int block_size = MB_SIZE >> s->block_max_depth; | |
4578 const int block_w = plane_index ? block_size/2 : block_size; | |
4579 int mb_y; | |
8308 | 4580 DWTCompose cs[MAX_DECOMPOSITIONS]; |
6414 | 4581 int yd=0, yq=0; |
4582 int y; | |
4583 int end_y; | |
4584 | |
4585 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
4586 for(mb_y=0; mb_y<=mb_h; mb_y++){ | |
4587 | |
4588 int slice_starty = block_w*mb_y; | |
4589 int slice_h = block_w*(mb_y+1); | |
4590 if (!(s->keyframe || s->avctx->debug&512)){ | |
4591 slice_starty = FFMAX(0, slice_starty - (block_w >> 1)); | |
4592 slice_h -= (block_w >> 1); | |
4593 } | |
4594 | |
4595 for(level=0; level<s->spatial_decomposition_count; level++){ | |
4596 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4597 SubBand *b= &p->band[level][orientation]; | |
4598 int start_y; | |
4599 int end_y; | |
4600 int our_mb_start = mb_y; | |
4601 int our_mb_end = (mb_y + 1); | |
4602 const int extra= 3; | |
4603 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0); | |
4604 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra); | |
4605 if (!(s->keyframe || s->avctx->debug&512)){ | |
4606 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level))); | |
4607 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level))); | |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4608 } |
6414 | 4609 start_y = FFMIN(b->height, start_y); |
4610 end_y = FFMIN(b->height, end_y); | |
4611 | |
4612 if (start_y != end_y){ | |
4613 if (orientation == 0){ | |
4614 SubBand * correlate_band = &p->band[0][0]; | |
4615 int correlate_end_y = FFMIN(b->height, end_y + 1); | |
4616 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0)); | |
4617 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]); | |
4618 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y); | |
4619 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y); | |
4620 } | |
4621 else | |
4622 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); | |
4623 } | |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4624 } |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4625 } |
6414 | 4626 |
4627 for(; yd<slice_h; yd+=4){ | |
4628 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); | |
4629 } | |
4630 | |
4631 if(s->qlog == LOSSLESS_QLOG){ | |
4632 for(; yq<slice_h && yq<h; yq++){ | |
4633 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq); | |
4634 for(x=0; x<w; x++){ | |
4635 line[x] <<= FRAC_BITS; | |
4636 } | |
2161 | 4637 } |
4638 } | |
6414 | 4639 |
4640 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y); | |
4641 | |
4642 y = FFMIN(p->height, slice_starty); | |
4643 end_y = FFMIN(p->height, slice_h); | |
4644 while(y < end_y) | |
4645 slice_buffer_release(&s->sb, y++); | |
2161 | 4646 } |
2562 | 4647 |
6414 | 4648 slice_buffer_flush(&s->sb); |
4649 } | |
4650 | |
2138 | 4651 } |
2967 | 4652 |
2138 | 4653 emms_c(); |
4654 | |
9374 | 4655 release_buffer(avctx); |
2138 | 4656 |
6414 | 4657 if(!(s->avctx->debug&2048)) |
4658 *picture= s->current_picture; | |
4659 else | |
4660 *picture= s->mconly_picture; | |
2967 | 4661 |
2138 | 4662 *data_size = sizeof(AVFrame); |
2967 | 4663 |
2335 | 4664 bytes_read= c->bytestream - c->bytestream_start; |
4665 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME | |
2138 | 4666 |
4667 return bytes_read; | |
4668 } | |
4669 | |
6517
48759bfbd073
Apply 'cold' attribute to init/uninit functions in libavcodec
zuxy
parents:
6481
diff
changeset
|
4670 static av_cold int decode_end(AVCodecContext *avctx) |
2138 | 4671 { |
4672 SnowContext *s = avctx->priv_data; | |
4673 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4674 slice_buffer_destroy(&s->sb); |
2967 | 4675 |
2138 | 4676 common_end(s); |
4677 | |
4678 return 0; | |
4679 } | |
4680 | |
4681 AVCodec snow_decoder = { | |
4682 "snow", | |
4683 CODEC_TYPE_VIDEO, | |
4684 CODEC_ID_SNOW, | |
4685 sizeof(SnowContext), | |
4686 decode_init, | |
4687 NULL, | |
4688 decode_end, | |
4689 decode_frame, | |
4690 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/, | |
6712 | 4691 NULL, |
7040
e943e1409077
Make AVCodec long_names definition conditional depending on CONFIG_SMALL.
stefano
parents:
6712
diff
changeset
|
4692 .long_name = NULL_IF_CONFIG_SMALL("Snow"), |
2138 | 4693 }; |
4694 | |
8590 | 4695 #if CONFIG_SNOW_ENCODER |
2138 | 4696 AVCodec snow_encoder = { |
4697 "snow", | |
4698 CODEC_TYPE_VIDEO, | |
4699 CODEC_ID_SNOW, | |
4700 sizeof(SnowContext), | |
4701 encode_init, | |
4702 encode_frame, | |
4703 encode_end, | |
7040
e943e1409077
Make AVCodec long_names definition conditional depending on CONFIG_SMALL.
stefano
parents:
6712
diff
changeset
|
4704 .long_name = NULL_IF_CONFIG_SMALL("Snow"), |
2138 | 4705 }; |
2408
a6e4da1c28ee
Disable encoders patch by (Gianluigi Tiesi <mplayer netfarm it>)
michael
parents:
2368
diff
changeset
|
4706 #endif |
2138 | 4707 |
4708 | |
6164
ecaf5226e9b0
Consistently use TEST as the preprocessor condition to enable test code.
diego
parents:
6144
diff
changeset
|
4709 #ifdef TEST |
2138 | 4710 #undef malloc |
4711 #undef free | |
4712 #undef printf | |
9199
ea0e5e9a520f
Replace random() usage in test programs by av_lfg_*().
diego
parents:
9197
diff
changeset
|
4713 |
ea0e5e9a520f
Replace random() usage in test programs by av_lfg_*().
diego
parents:
9197
diff
changeset
|
4714 #include "libavutil/lfg.h" |
2138 | 4715 |
5934 | 4716 int main(void){ |
2138 | 4717 int width=256; |
4718 int height=256; | |
4719 int buffer[2][width*height]; | |
4720 SnowContext s; | |
4721 int i; | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9376
diff
changeset
|
4722 AVLFG prng; |
2138 | 4723 s.spatial_decomposition_count=6; |
4724 s.spatial_decomposition_type=1; | |
9199
ea0e5e9a520f
Replace random() usage in test programs by av_lfg_*().
diego
parents:
9197
diff
changeset
|
4725 |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9376
diff
changeset
|
4726 av_lfg_init(&prng, 1); |
2967 | 4727 |
2138 | 4728 printf("testing 5/3 DWT\n"); |
4729 for(i=0; i<width*height; i++) | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9376
diff
changeset
|
4730 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345; |
2967 | 4731 |
2951 | 4732 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
4733 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
2967 | 4734 |
2138 | 4735 for(i=0; i<width*height; i++) |
9197 | 4736 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]); |
2138 | 4737 |
4738 printf("testing 9/7 DWT\n"); | |
4739 s.spatial_decomposition_type=0; | |
4740 for(i=0; i<width*height; i++) | |
9388
2313bf51945b
cosmetics: Rename prn variable to prng (Pseudo Random Number Generator).
diego
parents:
9376
diff
changeset
|
4741 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345; |
2967 | 4742 |
2951 | 4743 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
4744 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
2967 | 4745 |
2138 | 4746 for(i=0; i<width*height; i++) |
9197 | 4747 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]); |
2967 | 4748 |
2951 | 4749 #if 0 |
2138 | 4750 printf("testing AC coder\n"); |
4751 memset(s.header_state, 0, sizeof(s.header_state)); | |
2335 | 4752 ff_init_range_encoder(&s.c, buffer[0], 256*256); |
2138 | 4753 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); |
2967 | 4754 |
2138 | 4755 for(i=-256; i<256; i++){ |
4001 | 4756 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1); |
2138 | 4757 } |
2335 | 4758 ff_rac_terminate(&s.c); |
2138 | 4759 |
4760 memset(s.header_state, 0, sizeof(s.header_state)); | |
2335 | 4761 ff_init_range_decoder(&s.c, buffer[0], 256*256); |
2138 | 4762 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); |
2967 | 4763 |
2138 | 4764 for(i=-256; i<256; i++){ |
4765 int j; | |
4766 j= get_symbol(&s.c, s.header_state, 1); | |
4001 | 4767 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j); |
2138 | 4768 } |
2951 | 4769 #endif |
6414 | 4770 { |
4771 int level, orientation, x, y; | |
4772 int64_t errors[8][4]; | |
4773 int64_t g=0; | |
4774 | |
4775 memset(errors, 0, sizeof(errors)); | |
4776 s.spatial_decomposition_count=3; | |
4777 s.spatial_decomposition_type=0; | |
4778 for(level=0; level<s.spatial_decomposition_count; level++){ | |
4779 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4780 int w= width >> (s.spatial_decomposition_count-level); | |
4781 int h= height >> (s.spatial_decomposition_count-level); | |
4782 int stride= width << (s.spatial_decomposition_count-level); | |
4783 DWTELEM *buf= buffer[0]; | |
4784 int64_t error=0; | |
4785 | |
4786 if(orientation&1) buf+=w; | |
4787 if(orientation>1) buf+=stride>>1; | |
4788 | |
4789 memset(buffer[0], 0, sizeof(int)*width*height); | |
4790 buf[w/2 + h/2*stride]= 256*256; | |
4791 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
4792 for(y=0; y<height; y++){ | |
4793 for(x=0; x<width; x++){ | |
4794 int64_t d= buffer[0][x + y*width]; | |
4795 error += d*d; | |
4796 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d); | |
4797 } | |
4798 if(FFABS(height/2-y)<9 && level==2) printf("\n"); | |
2138 | 4799 } |
6414 | 4800 error= (int)(sqrt(error)+0.5); |
4801 errors[level][orientation]= error; | |
8611 | 4802 if(g) g=av_gcd(g, error); |
6414 | 4803 else g= error; |
2138 | 4804 } |
4805 } | |
6414 | 4806 printf("static int const visual_weight[][4]={\n"); |
4807 for(level=0; level<s.spatial_decomposition_count; level++){ | |
4808 printf(" {"); | |
4809 for(orientation=0; orientation<4; orientation++){ | |
4810 printf("%8"PRId64",", errors[level][orientation]/g); | |
4811 } | |
4812 printf("},\n"); | |
2138 | 4813 } |
6414 | 4814 printf("};\n"); |
4815 { | |
2138 | 4816 int level=2; |
4817 int w= width >> (s.spatial_decomposition_count-level); | |
6168 | 4818 //int h= height >> (s.spatial_decomposition_count-level); |
2138 | 4819 int stride= width << (s.spatial_decomposition_count-level); |
4820 DWTELEM *buf= buffer[0]; | |
4821 int64_t error=0; | |
4822 | |
4823 buf+=w; | |
4824 buf+=stride>>1; | |
2967 | 4825 |
2138 | 4826 memset(buffer[0], 0, sizeof(int)*width*height); |
4827 #if 1 | |
4828 for(y=0; y<height; y++){ | |
4829 for(x=0; x<width; x++){ | |
4830 int tab[4]={0,2,3,1}; | |
4831 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)]; | |
4832 } | |
4833 } | |
2951 | 4834 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
2138 | 4835 #else |
4836 for(y=0; y<h; y++){ | |
4837 for(x=0; x<w; x++){ | |
4838 buf[x + y*stride ]=169; | |
4839 buf[x + y*stride-w]=64; | |
4840 } | |
4841 } | |
2951 | 4842 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
2138 | 4843 #endif |
4844 for(y=0; y<height; y++){ | |
4845 for(x=0; x<width; x++){ | |
4846 int64_t d= buffer[0][x + y*width]; | |
4847 error += d*d; | |
4122
daae66c03857
Replace most of the %lld and %llx by their (cleaner) PRI*64 counterparts.
diego
parents:
4011
diff
changeset
|
4848 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d); |
2138 | 4849 } |
4001 | 4850 if(FFABS(height/2-y)<9) printf("\n"); |
2138 | 4851 } |
6414 | 4852 } |
4853 | |
2138 | 4854 } |
4855 return 0; | |
4856 } | |
6164
ecaf5226e9b0
Consistently use TEST as the preprocessor condition to enable test code.
diego
parents:
6144
diff
changeset
|
4857 #endif /* TEST */ |