Mercurial > libavcodec.hg
annotate snow.c @ 6434:442f96044f19 libavcodec
remove invalid free() forgotten in r12300
author | mru |
---|---|
date | Mon, 03 Mar 2008 21:54:18 +0000 |
parents | fd74b42a19f4 |
children | 5154ab444372 |
rev | line source |
---|---|
2138 | 1 /* |
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at> | |
3 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
4 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
5 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
6 * FFmpeg is free software; you can redistribute it and/or |
2138 | 7 * modify it under the terms of the GNU Lesser General Public |
8 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
9 * version 2.1 of the License, or (at your option) any later version. |
2138 | 10 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
11 * FFmpeg is distributed in the hope that it will be useful, |
2138 | 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * Lesser General Public License for more details. | |
15 * | |
16 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3920
diff
changeset
|
17 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3035
diff
changeset
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
2138 | 19 */ |
20 | |
21 #include "avcodec.h" | |
22 #include "dsputil.h" | |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
23 #include "snow.h" |
2335 | 24 |
25 #include "rangecoder.h" | |
2138 | 26 |
27 #include "mpegvideo.h" | |
28 | |
29 #undef NDEBUG | |
30 #include <assert.h> | |
31 | |
32 static const int8_t quant3[256]={ | |
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, | |
49 }; | |
50 static const int8_t quant3b[256]={ | |
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, | |
67 }; | |
2596 | 68 static const int8_t quant3bA[256]={ |
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, | |
85 }; | |
2138 | 86 static const int8_t quant5[256]={ |
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1, | |
103 }; | |
104 static const int8_t quant7[256]={ | |
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, | |
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, | |
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2, | |
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, | |
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1, | |
121 }; | |
122 static const int8_t quant9[256]={ | |
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, | |
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3, | |
138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1, | |
139 }; | |
140 static const int8_t quant11[256]={ | |
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, | |
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4, | |
155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, | |
156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1, | |
157 }; | |
158 static const int8_t quant13[256]={ | |
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, | |
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, | |
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, | |
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5, | |
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, | |
174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1, | |
175 }; | |
176 | |
177 #if 0 //64*cubic | |
178 static const uint8_t obmc32[1024]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
180 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
181 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
182 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
183 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
184 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
185 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
186 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
187 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
188 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
189 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
190 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
191 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
192 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
193 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
194 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
196 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
197 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
198 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
199 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
200 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
201 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
202 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
203 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
204 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
205 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
206 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
207 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
208 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
209 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2138 | 211 //error:0.000022 |
212 }; | |
213 static const uint8_t obmc16[256]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
214 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
215 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
216 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
217 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
218 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
219 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
220 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
221 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
223 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
224 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
225 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
226 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
227 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
228 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
229 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
2138 | 230 //error:0.000033 |
231 }; | |
232 #elif 1 // 64*linear | |
233 static const uint8_t obmc32[1024]={ | |
3206 | 234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, |
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, | |
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, | |
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, | |
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, | |
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, | |
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, | |
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, | |
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, | |
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, | |
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, | |
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, | |
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, | |
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, | |
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, | |
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, | |
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, | |
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, | |
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, | |
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, | |
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, | |
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, | |
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, | |
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, | |
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, | |
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, | |
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, | |
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, | |
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, | |
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, | |
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, | |
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, | |
2138 | 266 //error:0.000020 |
267 }; | |
268 static const uint8_t obmc16[256]={ | |
3206 | 269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, |
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, | |
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, | |
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, | |
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, | |
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, | |
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, | |
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, | |
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, | |
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, | |
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, | |
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, | |
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, | |
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, | |
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, | |
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, | |
2138 | 285 //error:0.000015 |
286 }; | |
287 #else //64*cos | |
288 static const uint8_t obmc32[1024]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
290 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
291 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
292 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
293 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
294 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
295 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
296 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
297 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
298 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
299 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
300 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
301 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
302 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
303 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
304 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
306 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
307 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
308 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
309 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
310 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
311 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
312 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
313 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
314 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
315 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
316 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
317 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
318 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
319 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
2138 | 321 //error:0.000022 |
322 }; | |
323 static const uint8_t obmc16[256]={ | |
5659
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
324 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
325 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
326 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
327 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
328 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
329 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
330 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
331 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
333 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
334 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
335 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
336 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
337 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
338 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0, |
ff44e77914ca
scaling obmc tables under #if 0 also by 4 (yes they where forgotten apparently)
michael
parents:
5652
diff
changeset
|
339 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, |
2138 | 340 //error:0.000022 |
341 }; | |
5910 | 342 #endif /* 0 */ |
2138 | 343 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
344 //linear *64 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
345 static const uint8_t obmc8[64]={ |
3206 | 346 4, 12, 20, 28, 28, 20, 12, 4, |
347 12, 36, 60, 84, 84, 60, 36, 12, | |
348 20, 60,100,140,140,100, 60, 20, | |
349 28, 84,140,196,196,140, 84, 28, | |
350 28, 84,140,196,196,140, 84, 28, | |
351 20, 60,100,140,140,100, 60, 20, | |
352 12, 36, 60, 84, 84, 60, 36, 12, | |
353 4, 12, 20, 28, 28, 20, 12, 4, | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
354 //error:0.000000 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
355 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
356 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
357 //linear *64 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
358 static const uint8_t obmc4[16]={ |
3206 | 359 16, 48, 48, 16, |
360 48,144,144, 48, | |
361 48,144,144, 48, | |
362 16, 48, 48, 16, | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
363 //error:0.000000 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
364 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
365 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
366 static const uint8_t *obmc_tab[4]={ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
367 obmc32, obmc16, obmc8, obmc4 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
368 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
369 |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES]; |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
371 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
372 typedef struct BlockNode{ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
373 int16_t mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
374 int16_t my; |
3314 | 375 uint8_t ref; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
376 uint8_t color[3]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
377 uint8_t type; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
378 //#define TYPE_SPLIT 1 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
379 #define BLOCK_INTRA 1 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
380 #define BLOCK_OPT 2 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
381 //#define TYPE_NOCOLOR 4 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
382 uint8_t level; //FIXME merge into type? |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
383 }BlockNode; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
384 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
385 static const BlockNode null_block= { //FIXME add border maybe |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
386 .color= {128,128,128}, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
387 .mx= 0, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
388 .my= 0, |
3314 | 389 .ref= 0, |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
390 .type= 0, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
391 .level= 0, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
392 }; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
393 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
394 #define LOG2_MB_SIZE 4 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
395 #define MB_SIZE (1<<LOG2_MB_SIZE) |
5575 | 396 #define ENCODER_EXTRA_BITS 4 |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
397 #define HTAPS_MAX 8 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
398 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
399 typedef struct x_and_coeff{ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
400 int16_t x; |
2596 | 401 uint16_t coeff; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
402 } x_and_coeff; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
403 |
2138 | 404 typedef struct SubBand{ |
405 int level; | |
406 int stride; | |
407 int width; | |
408 int height; | |
6412 | 409 int qlog; ///< log(qscale)/log[2^(1/6)] |
2138 | 410 DWTELEM *buf; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
411 IDWTELEM *ibuf; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
412 int buf_x_offset; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
413 int buf_y_offset; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
414 int stride_line; ///< Stride measured in lines, not pixels. |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
415 x_and_coeff * x_coeff; |
2138 | 416 struct SubBand *parent; |
417 uint8_t state[/*7*2*/ 7 + 512][32]; | |
418 }SubBand; | |
419 | |
420 typedef struct Plane{ | |
421 int width; | |
422 int height; | |
423 SubBand band[MAX_DECOMPOSITIONS][4]; | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
424 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
425 int htaps; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
426 int8_t hcoeff[HTAPS_MAX/2]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
427 int diag_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
428 int fast_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
429 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
430 int last_htaps; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
431 int8_t last_hcoeff[HTAPS_MAX/2]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
432 int last_diag_mc; |
2138 | 433 }Plane; |
434 | |
435 typedef struct SnowContext{ | |
6412 | 436 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX) |
2138 | 437 |
438 AVCodecContext *avctx; | |
2335 | 439 RangeCoder c; |
2138 | 440 DSPContext dsp; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
441 AVFrame new_picture; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
442 AVFrame input_picture; ///< new_picture with the internal linesizes |
2138 | 443 AVFrame current_picture; |
3314 | 444 AVFrame last_picture[MAX_REF_FRAMES]; |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
445 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4]; |
2138 | 446 AVFrame mconly_picture; |
447 // uint8_t q_context[16]; | |
448 uint8_t header_state[32]; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
449 uint8_t block_state[128 + 32*128]; |
2138 | 450 int keyframe; |
2199 | 451 int always_reset; |
2138 | 452 int version; |
453 int spatial_decomposition_type; | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
454 int last_spatial_decomposition_type; |
2138 | 455 int temporal_decomposition_type; |
456 int spatial_decomposition_count; | |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
457 int last_spatial_decomposition_count; |
2138 | 458 int temporal_decomposition_count; |
3314 | 459 int max_ref_frames; |
460 int ref_frames; | |
461 int16_t (*ref_mvs[MAX_REF_FRAMES])[2]; | |
462 uint32_t *ref_scores[MAX_REF_FRAMES]; | |
2138 | 463 DWTELEM *spatial_dwt_buffer; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
464 IDWTELEM *spatial_idwt_buffer; |
2138 | 465 int colorspace_type; |
466 int chroma_h_shift; | |
467 int chroma_v_shift; | |
468 int spatial_scalability; | |
469 int qlog; | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
470 int last_qlog; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
471 int lambda; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
472 int lambda2; |
3313 | 473 int pass1_rc; |
2138 | 474 int mv_scale; |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
475 int last_mv_scale; |
2138 | 476 int qbias; |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
477 int last_qbias; |
2138 | 478 #define QBIAS_SHIFT 3 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
479 int b_width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
480 int b_height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
481 int block_max_depth; |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
482 int last_block_max_depth; |
2138 | 483 Plane plane[MAX_PLANES]; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
484 BlockNode *block; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
485 #define ME_CACHE_SIZE 1024 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
486 int me_cache[ME_CACHE_SIZE]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
487 int me_cache_generation; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
488 slice_buffer sb; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
489 |
6412 | 490 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX) |
2138 | 491 }SnowContext; |
492 | |
2562 | 493 typedef struct { |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
494 IDWTELEM *b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
495 IDWTELEM *b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
496 IDWTELEM *b2; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
497 IDWTELEM *b3; |
2562 | 498 int y; |
499 } dwt_compose_t; | |
500 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
501 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
502 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
503 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
504 static void iterative_me(SnowContext *s); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
505 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
506 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer) |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
507 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
508 int i; |
2967 | 509 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
510 buf->base_buffer = base_buffer; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
511 buf->line_count = line_count; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
512 buf->line_width = line_width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
513 buf->data_count = max_allocated_lines; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
514 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
515 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines); |
2967 | 516 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
517 for (i = 0; i < max_allocated_lines; i++) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
518 { |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
519 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
520 } |
2967 | 521 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
522 buf->data_stack_top = max_allocated_lines - 1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
523 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
524 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
525 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
526 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
527 int offset; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
528 IDWTELEM * buffer; |
2967 | 529 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
530 assert(buf->data_stack_top >= 0); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
531 // assert(!buf->line[line]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
532 if (buf->line[line]) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
533 return buf->line[line]; |
2967 | 534 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
535 offset = buf->line_width * line; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
536 buffer = buf->data_stack[buf->data_stack_top]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
537 buf->data_stack_top--; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
538 buf->line[line] = buffer; |
2967 | 539 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
540 return buffer; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
541 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
542 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
543 static void slice_buffer_release(slice_buffer * buf, int line) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
544 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
545 int offset; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
546 IDWTELEM * buffer; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
547 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
548 assert(line >= 0 && line < buf->line_count); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
549 assert(buf->line[line]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
550 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
551 offset = buf->line_width * line; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
552 buffer = buf->line[line]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
553 buf->data_stack_top++; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
554 buf->data_stack[buf->data_stack_top] = buffer; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
555 buf->line[line] = NULL; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
556 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
557 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
558 static void slice_buffer_flush(slice_buffer * buf) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
559 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
560 int i; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
561 for (i = 0; i < buf->line_count; i++) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
562 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
563 if (buf->line[i]) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
564 slice_buffer_release(buf, i); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
565 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
566 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
567 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
568 static void slice_buffer_destroy(slice_buffer * buf) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
569 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
570 int i; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
571 slice_buffer_flush(buf); |
2967 | 572 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
573 for (i = buf->data_count - 1; i >= 0; i--) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
574 { |
3190 | 575 av_freep(&buf->data_stack[i]); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
576 } |
3190 | 577 av_freep(&buf->data_stack); |
578 av_freep(&buf->line); | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
579 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
580 |
2979 | 581 #ifdef __sgi |
2368
a7ac68734a91
fix for build on IRIX by (Michel Bardiaux {mbardiaux peaktime be})
michael
parents:
2335
diff
changeset
|
582 // Avoid a name clash on SGI IRIX |
2979 | 583 #undef qexp |
2368
a7ac68734a91
fix for build on IRIX by (Michel Bardiaux {mbardiaux peaktime be})
michael
parents:
2335
diff
changeset
|
584 #endif |
2246 | 585 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0 |
2600 | 586 static uint8_t qexp[QROOT]; |
2138 | 587 |
588 static inline int mirror(int v, int m){ | |
2998 | 589 while((unsigned)v > (unsigned)m){ |
590 v=-v; | |
591 if(v<0) v+= 2*m; | |
592 } | |
593 return v; | |
2138 | 594 } |
595 | |
2335 | 596 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){ |
2138 | 597 int i; |
598 | |
599 if(v){ | |
4001 | 600 const int a= FFABS(v); |
2138 | 601 const int e= av_log2(a); |
602 #if 1 | |
2967 | 603 const int el= FFMIN(e, 10); |
2335 | 604 put_rac(c, state+0, 0); |
2138 | 605 |
606 for(i=0; i<el; i++){ | |
2335 | 607 put_rac(c, state+1+i, 1); //1..10 |
2138 | 608 } |
609 for(; i<e; i++){ | |
2335 | 610 put_rac(c, state+1+9, 1); //1..10 |
2138 | 611 } |
2335 | 612 put_rac(c, state+1+FFMIN(i,9), 0); |
2138 | 613 |
614 for(i=e-1; i>=el; i--){ | |
2335 | 615 put_rac(c, state+22+9, (a>>i)&1); //22..31 |
2138 | 616 } |
617 for(; i>=0; i--){ | |
2335 | 618 put_rac(c, state+22+i, (a>>i)&1); //22..31 |
2138 | 619 } |
620 | |
621 if(is_signed) | |
2335 | 622 put_rac(c, state+11 + el, v < 0); //11..21 |
2138 | 623 #else |
2967 | 624 |
2335 | 625 put_rac(c, state+0, 0); |
2138 | 626 if(e<=9){ |
627 for(i=0; i<e; i++){ | |
2335 | 628 put_rac(c, state+1+i, 1); //1..10 |
2138 | 629 } |
2335 | 630 put_rac(c, state+1+i, 0); |
2138 | 631 |
632 for(i=e-1; i>=0; i--){ | |
2335 | 633 put_rac(c, state+22+i, (a>>i)&1); //22..31 |
2138 | 634 } |
635 | |
636 if(is_signed) | |
2335 | 637 put_rac(c, state+11 + e, v < 0); //11..21 |
2138 | 638 }else{ |
639 for(i=0; i<e; i++){ | |
2335 | 640 put_rac(c, state+1+FFMIN(i,9), 1); //1..10 |
2138 | 641 } |
2335 | 642 put_rac(c, state+1+FFMIN(i,9), 0); |
2138 | 643 |
644 for(i=e-1; i>=0; i--){ | |
2335 | 645 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31 |
2138 | 646 } |
647 | |
648 if(is_signed) | |
2335 | 649 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21 |
2138 | 650 } |
5910 | 651 #endif /* 1 */ |
2138 | 652 }else{ |
2335 | 653 put_rac(c, state+0, 1); |
2138 | 654 } |
655 } | |
656 | |
2335 | 657 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ |
658 if(get_rac(c, state+0)) | |
2138 | 659 return 0; |
660 else{ | |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
661 int i, e, a; |
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
662 e= 0; |
2335 | 663 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10 |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
664 e++; |
2138 | 665 } |
666 | |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
667 a= 1; |
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
668 for(i=e-1; i>=0; i--){ |
2335 | 669 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31 |
2240
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
670 } |
c46fed9b7575
simplify getsymbol patch by (Loren Merritt <lorenm at u dot washington dot edu>)
michael
parents:
2232
diff
changeset
|
671 |
2335 | 672 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21 |
2138 | 673 return -a; |
674 else | |
675 return a; | |
676 } | |
677 } | |
678 | |
2335 | 679 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
680 int i; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
681 int r= log2>=0 ? 1<<log2 : 1; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
682 |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
683 assert(v>=0); |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
684 assert(log2>=-4); |
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
685 |
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
686 while(v >= r){ |
2335 | 687 put_rac(c, state+4+log2, 1); |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
688 v -= r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
689 log2++; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
690 if(log2>0) r+=r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
691 } |
2335 | 692 put_rac(c, state+4+log2, 0); |
2967 | 693 |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
694 for(i=log2-1; i>=0; i--){ |
2335 | 695 put_rac(c, state+31-i, (v>>i)&1); |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
696 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
697 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
698 |
2335 | 699 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
700 int i; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
701 int r= log2>=0 ? 1<<log2 : 1; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
702 int v=0; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
703 |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
704 assert(log2>=-4); |
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
705 |
2335 | 706 while(get_rac(c, state+4+log2)){ |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
707 v+= r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
708 log2++; |
2159
7f42295c1517
improved magnitude coding, 0.2% lower bitrate (foreman@352x288 qscale 1 and 8)
michael
parents:
2156
diff
changeset
|
709 if(log2>0) r+=r; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
710 } |
2967 | 711 |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
712 for(i=log2-1; i>=0; i--){ |
2335 | 713 v+= get_rac(c, state+31-i)<<i; |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
714 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
715 |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
716 return v; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
717 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
718 |
5627 | 719 static av_always_inline void |
720 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, | |
721 int dst_step, int src_step, int ref_step, | |
722 int width, int mul, int add, int shift, | |
723 int highpass, int inverse){ | |
2138 | 724 const int mirror_left= !highpass; |
725 const int mirror_right= (width&1) ^ highpass; | |
726 const int w= (width>>1) - 1 + (highpass & width); | |
727 int i; | |
728 | |
729 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) | |
730 if(mirror_left){ | |
731 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); | |
732 dst += dst_step; | |
733 src += src_step; | |
734 } | |
2967 | 735 |
2138 | 736 for(i=0; i<w; i++){ |
5627 | 737 dst[i*dst_step] = |
738 LIFT(src[i*src_step], | |
739 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), | |
740 inverse); | |
2138 | 741 } |
2967 | 742 |
2138 | 743 if(mirror_right){ |
5627 | 744 dst[w*dst_step] = |
745 LIFT(src[w*src_step], | |
746 ((mul*2*ref[w*ref_step]+add)>>shift), | |
747 inverse); | |
2138 | 748 } |
749 } | |
750 | |
5627 | 751 static av_always_inline void |
752 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, | |
753 int dst_step, int src_step, int ref_step, | |
754 int width, int mul, int add, int shift, | |
755 int highpass, int inverse){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
756 const int mirror_left= !highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
757 const int mirror_right= (width&1) ^ highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
758 const int w= (width>>1) - 1 + (highpass & width); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
759 int i; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
760 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
761 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
762 if(mirror_left){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
763 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
764 dst += dst_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
765 src += src_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
766 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
767 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
768 for(i=0; i<w; i++){ |
5627 | 769 dst[i*dst_step] = |
770 LIFT(src[i*src_step], | |
771 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), | |
772 inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
773 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
774 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
775 if(mirror_right){ |
5627 | 776 dst[w*dst_step] = |
777 LIFT(src[w*src_step], | |
778 ((mul*2*ref[w*ref_step]+add)>>shift), | |
779 inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
780 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
781 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
782 |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
783 #ifndef liftS |
5627 | 784 static av_always_inline void |
785 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, | |
786 int dst_step, int src_step, int ref_step, | |
787 int width, int mul, int add, int shift, | |
788 int highpass, int inverse){ | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
789 const int mirror_left= !highpass; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
790 const int mirror_right= (width&1) ^ highpass; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
791 const int w= (width>>1) - 1 + (highpass & width); |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
792 int i; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
793 |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
794 assert(shift == 4); |
5627 | 795 #define LIFTS(src, ref, inv) \ |
796 ((inv) ? \ | |
797 (src) + (((ref) + 4*(src))>>shift): \ | |
798 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
799 if(mirror_left){ |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
800 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
801 dst += dst_step; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
802 src += src_step; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
803 } |
2967 | 804 |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
805 for(i=0; i<w; i++){ |
5627 | 806 dst[i*dst_step] = |
807 LIFTS(src[i*src_step], | |
808 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, | |
809 inverse); | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
810 } |
2967 | 811 |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
812 if(mirror_right){ |
5627 | 813 dst[w*dst_step] = |
814 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
815 } |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
816 } |
5627 | 817 static av_always_inline void |
818 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, | |
819 int dst_step, int src_step, int ref_step, | |
820 int width, int mul, int add, int shift, | |
821 int highpass, int inverse){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
822 const int mirror_left= !highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
823 const int mirror_right= (width&1) ^ highpass; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
824 const int w= (width>>1) - 1 + (highpass & width); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
825 int i; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
826 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
827 assert(shift == 4); |
5627 | 828 #define LIFTS(src, ref, inv) \ |
829 ((inv) ? \ | |
830 (src) + (((ref) + 4*(src))>>shift): \ | |
831 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23))) | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
832 if(mirror_left){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
833 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
834 dst += dst_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
835 src += src_step; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
836 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
837 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
838 for(i=0; i<w; i++){ |
5627 | 839 dst[i*dst_step] = |
840 LIFTS(src[i*src_step], | |
841 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, | |
842 inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
843 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
844 |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
845 if(mirror_right){ |
5627 | 846 dst[w*dst_step] = |
847 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
848 } |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
849 } |
6413 | 850 #endif /* ! liftS */ |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
851 |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
852 static void horizontal_decompose53i(DWTELEM *b, int width){ |
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
853 DWTELEM temp[width]; |
2138 | 854 const int width2= width>>1; |
2893 | 855 int x; |
2138 | 856 const int w2= (width+1)>>1; |
857 | |
858 for(x=0; x<width2; x++){ | |
859 temp[x ]= b[2*x ]; | |
860 temp[x+w2]= b[2*x + 1]; | |
861 } | |
862 if(width&1) | |
863 temp[x ]= b[2*x ]; | |
864 #if 0 | |
2893 | 865 { |
866 int A1,A2,A3,A4; | |
2138 | 867 A2= temp[1 ]; |
868 A4= temp[0 ]; | |
869 A1= temp[0+width2]; | |
870 A1 -= (A2 + A4)>>1; | |
871 A4 += (A1 + 1)>>1; | |
872 b[0+width2] = A1; | |
873 b[0 ] = A4; | |
874 for(x=1; x+1<width2; x+=2){ | |
875 A3= temp[x+width2]; | |
876 A4= temp[x+1 ]; | |
877 A3 -= (A2 + A4)>>1; | |
878 A2 += (A1 + A3 + 2)>>2; | |
879 b[x+width2] = A3; | |
880 b[x ] = A2; | |
881 | |
882 A1= temp[x+1+width2]; | |
883 A2= temp[x+2 ]; | |
884 A1 -= (A2 + A4)>>1; | |
885 A4 += (A1 + A3 + 2)>>2; | |
886 b[x+1+width2] = A1; | |
887 b[x+1 ] = A4; | |
888 } | |
889 A3= temp[width-1]; | |
890 A3 -= A2; | |
891 A2 += (A1 + A3 + 2)>>2; | |
892 b[width -1] = A3; | |
893 b[width2-1] = A2; | |
2893 | 894 } |
2967 | 895 #else |
2138 | 896 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); |
897 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0); | |
6413 | 898 #endif /* 0 */ |
2138 | 899 } |
900 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
901 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 902 int i; |
2967 | 903 |
2138 | 904 for(i=0; i<width; i++){ |
905 b1[i] -= (b0[i] + b2[i])>>1; | |
906 } | |
907 } | |
908 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
909 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 910 int i; |
2967 | 911 |
2138 | 912 for(i=0; i<width; i++){ |
913 b1[i] += (b0[i] + b2[i] + 2)>>2; | |
914 } | |
915 } | |
916 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
917 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){ |
2198 | 918 int y; |
2138 | 919 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride; |
920 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride; | |
2967 | 921 |
2138 | 922 for(y=-2; y<height; y+=2){ |
923 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; | |
924 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; | |
925 | |
2998 | 926 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width); |
927 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width); | |
6405 | 928 |
2998 | 929 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width); |
930 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width); | |
2967 | 931 |
2138 | 932 b0=b2; |
933 b1=b3; | |
934 } | |
935 } | |
936 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
937 static void horizontal_decompose97i(DWTELEM *b, int width){ |
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
938 DWTELEM temp[width]; |
2138 | 939 const int w2= (width+1)>>1; |
940 | |
5565
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
941 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1); |
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
942 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0); |
5589
946c2db0a093
cleanup (remove some old experimentation related code)
michael
parents:
5588
diff
changeset
|
943 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); |
2138 | 944 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); |
945 } | |
946 | |
947 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
948 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 949 int i; |
2967 | 950 |
2138 | 951 for(i=0; i<width; i++){ |
952 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
953 } | |
954 } | |
955 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
956 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 957 int i; |
2967 | 958 |
2138 | 959 for(i=0; i<width; i++){ |
960 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | |
961 } | |
962 } | |
963 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
964 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 965 int i; |
2967 | 966 |
2138 | 967 for(i=0; i<width; i++){ |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
968 #ifdef liftS |
2138 | 969 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
970 #else |
5565
93082c591c8b
Change rounding of the horizontal DWT to match the vertical one.
michael
parents:
5551
diff
changeset
|
971 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23); |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
972 #endif |
2138 | 973 } |
974 } | |
975 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
976 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ |
2138 | 977 int i; |
2967 | 978 |
2138 | 979 for(i=0; i<width; i++){ |
980 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | |
981 } | |
982 } | |
983 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
984 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){ |
2198 | 985 int y; |
2138 | 986 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride; |
987 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride; | |
988 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride; | |
989 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride; | |
2967 | 990 |
2138 | 991 for(y=-4; y<height; y+=2){ |
992 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; | |
993 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; | |
994 | |
2998 | 995 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width); |
996 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width); | |
6405 | 997 |
2998 | 998 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width); |
999 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width); | |
1000 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width); | |
1001 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width); | |
2138 | 1002 |
1003 b0=b2; | |
1004 b1=b3; | |
1005 b2=b4; | |
1006 b3=b5; | |
1007 } | |
1008 } | |
1009 | |
2241
c26038875ebc
consistent use of types patch by (D Richard Felker III <dalias at aerifal dot cx>)
michael
parents:
2240
diff
changeset
|
1010 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
2138 | 1011 int level; |
2967 | 1012 |
2164 | 1013 for(level=0; level<decomposition_count; level++){ |
1014 switch(type){ | |
3326 | 1015 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break; |
1016 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break; | |
2138 | 1017 } |
1018 } | |
1019 } | |
1020 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1021 static void horizontal_compose53i(IDWTELEM *b, int width){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1022 IDWTELEM temp[width]; |
2138 | 1023 const int width2= width>>1; |
1024 const int w2= (width+1)>>1; | |
2893 | 1025 int x; |
2138 | 1026 |
1027 #if 0 | |
2893 | 1028 int A1,A2,A3,A4; |
2138 | 1029 A2= temp[1 ]; |
1030 A4= temp[0 ]; | |
1031 A1= temp[0+width2]; | |
1032 A1 -= (A2 + A4)>>1; | |
1033 A4 += (A1 + 1)>>1; | |
1034 b[0+width2] = A1; | |
1035 b[0 ] = A4; | |
1036 for(x=1; x+1<width2; x+=2){ | |
1037 A3= temp[x+width2]; | |
1038 A4= temp[x+1 ]; | |
1039 A3 -= (A2 + A4)>>1; | |
1040 A2 += (A1 + A3 + 2)>>2; | |
1041 b[x+width2] = A3; | |
1042 b[x ] = A2; | |
1043 | |
1044 A1= temp[x+1+width2]; | |
1045 A2= temp[x+2 ]; | |
1046 A1 -= (A2 + A4)>>1; | |
1047 A4 += (A1 + A3 + 2)>>2; | |
1048 b[x+1+width2] = A1; | |
1049 b[x+1 ] = A4; | |
1050 } | |
1051 A3= temp[width-1]; | |
1052 A3 -= A2; | |
1053 A2 += (A1 + A3 + 2)>>2; | |
1054 b[width -1] = A3; | |
1055 b[width2-1] = A2; | |
2967 | 1056 #else |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1057 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1058 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1); |
5910 | 1059 #endif /* 0 */ |
2138 | 1060 for(x=0; x<width2; x++){ |
1061 b[2*x ]= temp[x ]; | |
1062 b[2*x + 1]= temp[x+w2]; | |
1063 } | |
1064 if(width&1) | |
1065 b[2*x ]= temp[x ]; | |
1066 } | |
1067 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1068 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1069 int i; |
2967 | 1070 |
2138 | 1071 for(i=0; i<width; i++){ |
1072 b1[i] += (b0[i] + b2[i])>>1; | |
1073 } | |
1074 } | |
1075 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1076 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1077 int i; |
2967 | 1078 |
2138 | 1079 for(i=0; i<width; i++){ |
1080 b1[i] -= (b0[i] + b2[i] + 2)>>2; | |
1081 } | |
1082 } | |
1083 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1084 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1085 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1086 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1087 cs->y = -1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1088 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1089 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1090 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){ |
2562 | 1091 cs->b0 = buffer + mirror(-1-1, height-1)*stride; |
1092 cs->b1 = buffer + mirror(-1 , height-1)*stride; | |
1093 cs->y = -1; | |
1094 } | |
1095 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1096 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1097 int y= cs->y; |
2967 | 1098 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1099 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1100 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1101 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1102 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1103 |
2998 | 1104 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
1105 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | |
6405 | 1106 |
2998 | 1107 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); |
1108 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1109 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1110 cs->b0 = b2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1111 cs->b1 = b3; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1112 cs->y += 2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1113 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1114 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1115 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){ |
2562 | 1116 int y= cs->y; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1117 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1118 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1119 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1120 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride; |
2138 | 1121 |
2998 | 1122 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width); |
1123 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width); | |
6405 | 1124 |
2998 | 1125 if(y-1<(unsigned)height) horizontal_compose53i(b0, width); |
1126 if(y+0<(unsigned)height) horizontal_compose53i(b1, width); | |
2138 | 1127 |
2562 | 1128 cs->b0 = b2; |
1129 cs->b1 = b3; | |
1130 cs->y += 2; | |
1131 } | |
1132 | |
5989
382aa8b243c5
prevent warnings about functions being possibly unused
michael
parents:
5988
diff
changeset
|
1133 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){ |
5988
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1134 dwt_compose_t cs; |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1135 spatial_compose53i_init(&cs, buffer, height, stride); |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1136 while(cs.y <= height) |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1137 spatial_compose53i_dy(&cs, buffer, width, height, stride); |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1138 } |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1139 |
2967 | 1140 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1141 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1142 IDWTELEM temp[width]; |
2138 | 1143 const int w2= (width+1)>>1; |
1144 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1145 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); |
5589
946c2db0a093
cleanup (remove some old experimentation related code)
michael
parents:
5588
diff
changeset
|
1146 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1147 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1148 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0); |
2138 | 1149 } |
1150 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1151 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1152 int i; |
2967 | 1153 |
2138 | 1154 for(i=0; i<width; i++){ |
1155 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; | |
1156 } | |
1157 } | |
1158 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1159 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1160 int i; |
2967 | 1161 |
2138 | 1162 for(i=0; i<width; i++){ |
1163 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; | |
1164 } | |
1165 } | |
1166 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1167 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1168 int i; |
2967 | 1169 |
2138 | 1170 for(i=0; i<width; i++){ |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1171 #ifdef liftS |
2138 | 1172 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1173 #else |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1174 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1175 #endif |
2138 | 1176 } |
1177 } | |
1178 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1179 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){ |
2138 | 1180 int i; |
2967 | 1181 |
2138 | 1182 for(i=0; i<width; i++){ |
1183 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | |
1184 } | |
1185 } | |
1186 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1187 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ |
2592 | 1188 int i; |
2967 | 1189 |
2592 | 1190 for(i=0; i<width; i++){ |
1191 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS; | |
1192 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS; | |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1193 #ifdef liftS |
2592 | 1194 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS; |
2602
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1195 #else |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1196 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS; |
5ec55feb6fdd
rescale coefficients during IDWT, that way the lifting steps are much simpler and faster
michael
parents:
2601
diff
changeset
|
1197 #endif |
2592 | 1198 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; |
1199 } | |
1200 } | |
1201 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1202 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1203 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1204 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1205 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1206 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1207 cs->y = -3; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1208 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1209 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1210 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){ |
2562 | 1211 cs->b0 = buffer + mirror(-3-1, height-1)*stride; |
1212 cs->b1 = buffer + mirror(-3 , height-1)*stride; | |
1213 cs->b2 = buffer + mirror(-3+1, height-1)*stride; | |
1214 cs->b3 = buffer + mirror(-3+2, height-1)*stride; | |
1215 cs->y = -3; | |
1216 } | |
2138 | 1217 |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1218 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1219 int y = cs->y; |
2967 | 1220 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1221 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1222 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1223 IDWTELEM *b2= cs->b2; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1224 IDWTELEM *b3= cs->b3; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1225 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1226 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line); |
2967 | 1227 |
2592 | 1228 if(y>0 && y+4<height){ |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1229 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); |
2592 | 1230 }else{ |
2998 | 1231 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
1232 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | |
1233 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | |
1234 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); | |
2592 | 1235 } |
6405 | 1236 |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1237 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1238 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1239 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1240 cs->b0=b2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1241 cs->b1=b3; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1242 cs->b2=b4; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1243 cs->b3=b5; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1244 cs->y += 2; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1245 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1246 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1247 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){ |
2562 | 1248 int y = cs->y; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1249 IDWTELEM *b0= cs->b0; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1250 IDWTELEM *b1= cs->b1; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1251 IDWTELEM *b2= cs->b2; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1252 IDWTELEM *b3= cs->b3; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1253 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1254 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride; |
2138 | 1255 |
2998 | 1256 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); |
1257 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); | |
1258 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width); | |
1259 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width); | |
6405 | 1260 |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1261 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1262 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width); |
2562 | 1263 |
1264 cs->b0=b2; | |
1265 cs->b1=b3; | |
1266 cs->b2=b4; | |
1267 cs->b3=b5; | |
1268 cs->y += 2; | |
1269 } | |
1270 | |
5989
382aa8b243c5
prevent warnings about functions being possibly unused
michael
parents:
5988
diff
changeset
|
1271 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){ |
5988
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1272 dwt_compose_t cs; |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1273 spatial_compose97i_init(&cs, buffer, height, stride); |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1274 while(cs.y <= height) |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1275 spatial_compose97i_dy(&cs, buffer, width, height, stride); |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1276 } |
919193c63fb8
thank you for your efforts to keep my code clean, but i would like to
michael
parents:
5934
diff
changeset
|
1277 |
3075 | 1278 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1279 int level; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1280 for(level=decomposition_count-1; level>=0; level--){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1281 switch(type){ |
3326 | 1282 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; |
1283 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1284 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1285 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1286 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1287 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1288 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
2562 | 1289 int level; |
1290 for(level=decomposition_count-1; level>=0; level--){ | |
1291 switch(type){ | |
3326 | 1292 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; |
1293 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; | |
2562 | 1294 } |
1295 } | |
1296 } | |
1297 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1298 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){ |
2562 | 1299 const int support = type==1 ? 3 : 5; |
1300 int level; | |
1301 if(type==2) return; | |
1302 | |
1303 for(level=decomposition_count-1; level>=0; level--){ | |
1304 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ | |
1305 switch(type){ | |
3326 | 1306 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); |
6414 | 1307 break; |
3326 | 1308 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); |
6414 | 1309 break; |
2562 | 1310 } |
1311 } | |
2138 | 1312 } |
1313 } | |
1314 | |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
1315 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1316 const int support = type==1 ? 3 : 5; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1317 int level; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1318 if(type==2) return; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1319 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1320 for(level=decomposition_count-1; level>=0; level--){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1321 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1322 switch(type){ |
3326 | 1323 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level); |
6414 | 1324 break; |
3326 | 1325 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); |
6414 | 1326 break; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1327 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1328 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1329 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1330 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1331 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1332 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ |
2562 | 1333 dwt_compose_t cs[MAX_DECOMPOSITIONS]; |
1334 int y; | |
1335 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count); | |
1336 for(y=0; y<height; y+=4) | |
1337 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y); | |
2138 | 1338 } |
1339 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1340 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1341 const int w= b->width; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1342 const int h= b->height; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1343 int x, y; |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1344 |
2138 | 1345 if(1){ |
1346 int run=0; | |
2149 | 1347 int runs[w*h]; |
2138 | 1348 int run_index=0; |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1349 int max_index; |
2967 | 1350 |
2138 | 1351 for(y=0; y<h; y++){ |
1352 for(x=0; x<w; x++){ | |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1353 int v, p=0; |
2144 | 1354 int /*ll=0, */l=0, lt=0, t=0, rt=0; |
2149 | 1355 v= src[x + y*stride]; |
2138 | 1356 |
1357 if(y){ | |
2149 | 1358 t= src[x + (y-1)*stride]; |
2138 | 1359 if(x){ |
2149 | 1360 lt= src[x - 1 + (y-1)*stride]; |
2138 | 1361 } |
1362 if(x + 1 < w){ | |
2149 | 1363 rt= src[x + 1 + (y-1)*stride]; |
2138 | 1364 } |
1365 } | |
1366 if(x){ | |
2149 | 1367 l= src[x - 1 + y*stride]; |
2144 | 1368 /*if(x > 1){ |
1369 if(orientation==1) ll= src[y + (x-2)*stride]; | |
1370 else ll= src[x - 2 + y*stride]; | |
2138 | 1371 }*/ |
1372 } | |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1373 if(parent){ |
2149 | 1374 int px= x>>1; |
1375 int py= y>>1; | |
2967 | 1376 if(px<b->parent->width && py<b->parent->height) |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1377 p= parent[px + py*2*stride]; |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1378 } |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1379 if(!(/*ll|*/l|lt|t|rt|p)){ |
2138 | 1380 if(v){ |
1381 runs[run_index++]= run; | |
1382 run=0; | |
1383 }else{ | |
1384 run++; | |
1385 } | |
1386 } | |
1387 } | |
1388 } | |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1389 max_index= run_index; |
2138 | 1390 runs[run_index++]= run; |
1391 run_index=0; | |
1392 run= runs[run_index++]; | |
1393 | |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1394 put_symbol2(&s->c, b->state[30], max_index, 0); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1395 if(run_index <= max_index) |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1396 put_symbol2(&s->c, b->state[1], run, 3); |
2967 | 1397 |
2138 | 1398 for(y=0; y<h; y++){ |
2435
c89ac0e70c66
10l patch by (matthieu castet <castet.matthieu free fr>)
michael
parents:
2422
diff
changeset
|
1399 if(s->c.bytestream_end - s->c.bytestream < w*40){ |
2422 | 1400 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
1401 return -1; | |
1402 } | |
2138 | 1403 for(x=0; x<w; x++){ |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1404 int v, p=0; |
2144 | 1405 int /*ll=0, */l=0, lt=0, t=0, rt=0; |
2149 | 1406 v= src[x + y*stride]; |
2138 | 1407 |
1408 if(y){ | |
2149 | 1409 t= src[x + (y-1)*stride]; |
2138 | 1410 if(x){ |
2149 | 1411 lt= src[x - 1 + (y-1)*stride]; |
2138 | 1412 } |
1413 if(x + 1 < w){ | |
2149 | 1414 rt= src[x + 1 + (y-1)*stride]; |
2138 | 1415 } |
1416 } | |
1417 if(x){ | |
2149 | 1418 l= src[x - 1 + y*stride]; |
2144 | 1419 /*if(x > 1){ |
1420 if(orientation==1) ll= src[y + (x-2)*stride]; | |
1421 else ll= src[x - 2 + y*stride]; | |
2138 | 1422 }*/ |
1423 } | |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1424 if(parent){ |
2149 | 1425 int px= x>>1; |
1426 int py= y>>1; | |
2967 | 1427 if(px<b->parent->width && py<b->parent->height) |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1428 p= parent[px + py*2*stride]; |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1429 } |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1430 if(/*ll|*/l|lt|t|rt|p){ |
4001 | 1431 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); |
2144 | 1432 |
2335 | 1433 put_rac(&s->c, &b->state[0][context], !!v); |
2138 | 1434 }else{ |
1435 if(!run){ | |
1436 run= runs[run_index++]; | |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1437 |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1438 if(run_index <= max_index) |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1439 put_symbol2(&s->c, b->state[1], run, 3); |
2138 | 1440 assert(v); |
1441 }else{ | |
1442 run--; | |
1443 assert(!v); | |
1444 } | |
1445 } | |
1446 if(v){ | |
4001 | 1447 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p)); |
1448 int l2= 2*FFABS(l) + (l<0); | |
1449 int t2= 2*FFABS(t) + (t<0); | |
1450 | |
1451 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4); | |
2596 | 1452 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0); |
2138 | 1453 } |
1454 } | |
1455 } | |
1456 } | |
2422 | 1457 return 0; |
2138 | 1458 } |
1459 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1460 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){ |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1461 // encode_subband_qtree(s, b, src, parent, stride, orientation); |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1462 // encode_subband_z0run(s, b, src, parent, stride, orientation); |
2422 | 1463 return encode_subband_c0run(s, b, src, parent, stride, orientation); |
2155
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1464 // encode_subband_dzr(s, b, src, parent, stride, orientation); |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1465 } |
274a01d80f4a
various subband encoders (all either worse or complicated so they are commented out)
michael
parents:
2152
diff
changeset
|
1466 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1467 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){ |
2138 | 1468 const int w= b->width; |
1469 const int h= b->height; | |
1470 int x,y; | |
2967 | 1471 |
2138 | 1472 if(1){ |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1473 int run, runs; |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1474 x_and_coeff *xc= b->x_coeff; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1475 x_and_coeff *prev_xc= NULL; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1476 x_and_coeff *prev2_xc= xc; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1477 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1478 x_and_coeff *prev_parent_xc= parent_xc; |
2138 | 1479 |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1480 runs= get_symbol2(&s->c, b->state[30], 0); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1481 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1482 else run= INT_MAX; |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1483 |
2138 | 1484 for(y=0; y<h; y++){ |
2193 | 1485 int v=0; |
1486 int lt=0, t=0, rt=0; | |
1487 | |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1488 if(y && prev_xc->x == 0){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1489 rt= prev_xc->coeff; |
2193 | 1490 } |
2138 | 1491 for(x=0; x<w; x++){ |
2193 | 1492 int p=0; |
1493 const int l= v; | |
2967 | 1494 |
2193 | 1495 lt= t; t= rt; |
1496 | |
2194 | 1497 if(y){ |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1498 if(prev_xc->x <= x) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1499 prev_xc++; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1500 if(prev_xc->x == x + 1) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1501 rt= prev_xc->coeff; |
2194 | 1502 else |
1503 rt=0; | |
1504 } | |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1505 if(parent_xc){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1506 if(x>>1 > parent_xc->x){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1507 parent_xc++; |
2192 | 1508 } |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1509 if(x>>1 == parent_xc->x){ |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1510 p= parent_xc->coeff; |
2194 | 1511 } |
2148
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1512 } |
678be5a8f282
use parent sample to predict significance & magnitude
michael
parents:
2146
diff
changeset
|
1513 if(/*ll|*/l|lt|t|rt|p){ |
4001 | 1514 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1)); |
2144 | 1515 |
2335 | 1516 v=get_rac(&s->c, &b->state[0][context]); |
2605 | 1517 if(v){ |
1518 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1); | |
1519 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]); | |
2967 | 1520 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1521 xc->x=x; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1522 (xc++)->coeff= v; |
2605 | 1523 } |
2138 | 1524 }else{ |
1525 if(!run){ | |
2609
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1526 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); |
0f74a379a890
store the number of runs to avoid storing the last run value
michael
parents:
2608
diff
changeset
|
1527 else run= INT_MAX; |
2605 | 1528 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1); |
1529 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]); | |
2967 | 1530 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1531 xc->x=x; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1532 (xc++)->coeff= v; |
2138 | 1533 }else{ |
2606 | 1534 int max_run; |
2138 | 1535 run--; |
1536 v=0; | |
2191 | 1537 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1538 if(y) max_run= FFMIN(run, prev_xc->x - x - 2); |
2606 | 1539 else max_run= FFMIN(run, w-x-1); |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1540 if(parent_xc) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1541 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1); |
2606 | 1542 x+= max_run; |
1543 run-= max_run; | |
2138 | 1544 } |
1545 } | |
2192 | 1546 } |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1547 (xc++)->x= w+1; //end marker |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1548 prev_xc= prev2_xc; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1549 prev2_xc= xc; |
2967 | 1550 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1551 if(parent_xc){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1552 if(y&1){ |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1553 while(parent_xc->x != parent->width+1) |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1554 parent_xc++; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1555 parent_xc++; |
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1556 prev_parent_xc= parent_xc; |
2192 | 1557 }else{ |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1558 parent_xc= prev_parent_xc; |
2138 | 1559 } |
1560 } | |
1561 } | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1562 |
2607
fde7b6fe2aaf
replace complicated pointer dereference + index stuff by pointers in unpack_coeffs()
michael
parents:
2606
diff
changeset
|
1563 (xc++)->x= w+1; //end marker |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1564 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1565 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1566 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1567 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1568 const int w= b->width; |
2893 | 1569 int y; |
4594 | 1570 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
2600 | 1571 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1572 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1573 int new_index = 0; |
2967 | 1574 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1575 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1576 qadd= 0; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1577 qmul= 1<<QEXPSHIFT; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1578 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1579 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1580 /* If we are on the second or later slice, restore our index. */ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1581 if (start_y != 0) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1582 new_index = save_state[0]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1583 |
2967 | 1584 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1585 for(y=start_y; y<h; y++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1586 int x = 0; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1587 int v; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1588 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
1589 memset(line, 0, b->width*sizeof(IDWTELEM)); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1590 v = b->x_coeff[new_index].coeff; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1591 x = b->x_coeff[new_index++].x; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1592 while(x < w) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1593 { |
2596 | 1594 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT; |
1595 register int u= -(v&1); | |
1596 line[x] = (t^u) - u; | |
1597 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1598 v = b->x_coeff[new_index].coeff; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1599 x = b->x_coeff[new_index++].x; |
2138 | 1600 } |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1601 } |
2967 | 1602 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1603 /* Save our variables for the next slice. */ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1604 save_state[0] = new_index; |
2967 | 1605 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
1606 return; |
2138 | 1607 } |
1608 | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
1609 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts |
2138 | 1610 int plane_index, level, orientation; |
1611 | |
2199 | 1612 for(plane_index=0; plane_index<3; plane_index++){ |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
1613 for(level=0; level<MAX_DECOMPOSITIONS; level++){ |
2138 | 1614 for(orientation=level ? 1:0; orientation<4; orientation++){ |
2335 | 1615 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state)); |
2138 | 1616 } |
1617 } | |
1618 } | |
2335 | 1619 memset(s->header_state, MID_STATE, sizeof(s->header_state)); |
1620 memset(s->block_state, MID_STATE, sizeof(s->block_state)); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1621 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1622 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1623 static int alloc_blocks(SnowContext *s){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1624 int w= -((-s->avctx->width )>>LOG2_MB_SIZE); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1625 int h= -((-s->avctx->height)>>LOG2_MB_SIZE); |
2967 | 1626 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1627 s->b_width = w; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1628 s->b_height= h; |
2967 | 1629 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1630 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1631 return 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1632 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1633 |
2335 | 1634 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){ |
1635 uint8_t *bytestream= d->bytestream; | |
1636 uint8_t *bytestream_start= d->bytestream_start; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1637 *d= *s; |
2335 | 1638 d->bytestream= bytestream; |
1639 d->bytestream_start= bytestream_start; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1640 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1641 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1642 //near copy & paste from dsputil, FIXME |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1643 static int pix_sum(uint8_t * pix, int line_size, int w) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1644 { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1645 int s, i, j; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1646 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1647 s = 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1648 for (i = 0; i < w; i++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1649 for (j = 0; j < w; j++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1650 s += pix[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1651 pix ++; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1652 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1653 pix += line_size - w; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1654 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1655 return s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1656 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1657 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1658 //near copy & paste from dsputil, FIXME |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1659 static int pix_norm1(uint8_t * pix, int line_size, int w) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1660 { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1661 int s, i, j; |
4179 | 1662 uint32_t *sq = ff_squareTbl + 256; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1663 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1664 s = 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1665 for (i = 0; i < w; i++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1666 for (j = 0; j < w; j ++) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1667 s += sq[pix[0]]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1668 pix ++; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1669 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1670 pix += line_size - w; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1671 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1672 return s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1673 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1674 |
3314 | 1675 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){ |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1676 const int w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1677 const int rem_depth= s->block_max_depth - level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1678 const int index= (x + y*w) << rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1679 const int block_w= 1<<rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1680 BlockNode block; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1681 int i,j; |
2967 | 1682 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1683 block.color[0]= l; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1684 block.color[1]= cb; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1685 block.color[2]= cr; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1686 block.mx= mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1687 block.my= my; |
3314 | 1688 block.ref= ref; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1689 block.type= type; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1690 block.level= level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1691 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1692 for(j=0; j<block_w; j++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1693 for(i=0; i<block_w; i++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1694 s->block[index + i + j*w]= block; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1695 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1696 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1697 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1698 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1699 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1700 const int offset[3]= { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1701 y*c-> stride + x, |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1702 ((y*c->uvstride + x)>>1), |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1703 ((y*c->uvstride + x)>>1), |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1704 }; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1705 int i; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1706 for(i=0; i<3; i++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1707 c->src[0][i]= src [i]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1708 c->ref[0][i]= ref [i] + offset[i]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1709 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1710 assert(!ref_index); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1711 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1712 |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1713 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref, |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1714 const BlockNode *left, const BlockNode *top, const BlockNode *tr){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1715 if(s->ref_frames == 1){ |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1716 *mx = mid_pred(left->mx, top->mx, tr->mx); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1717 *my = mid_pred(left->my, top->my, tr->my); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1718 }else{ |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1719 const int *scale = scale_mv_ref[ref]; |
4407
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1720 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1721 (top ->mx * scale[top ->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1722 (tr ->mx * scale[tr ->ref] + 128) >>8); |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1723 *my = mid_pred((left->my * scale[left->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1724 (top ->my * scale[top ->ref] + 128) >>8, |
15688bdfe1b5
Brings down the number of snow.c warnings from 33 to 27 by putting parentheses
takis
parents:
4360
diff
changeset
|
1725 (tr ->my * scale[tr ->ref] + 128) >>8); |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1726 } |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1727 } |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1728 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1729 //FIXME copy&paste |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1730 #define P_LEFT P[1] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1731 #define P_TOP P[2] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1732 #define P_TOPRIGHT P[3] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1733 #define P_MEDIAN P[4] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1734 #define P_MV1 P[9] |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1735 #define FLAG_QPEL 1 //must be 1 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1736 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1737 static int encode_q_branch(SnowContext *s, int level, int x, int y){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1738 uint8_t p_buffer[1024]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1739 uint8_t i_buffer[1024]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1740 uint8_t p_state[sizeof(s->block_state)]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1741 uint8_t i_state[sizeof(s->block_state)]; |
2335 | 1742 RangeCoder pc, ic; |
1743 uint8_t *pbbak= s->c.bytestream; | |
1744 uint8_t *pbbak_start= s->c.bytestream_start; | |
5082 | 1745 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1746 const int w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1747 const int h= s->b_height << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1748 const int rem_depth= s->block_max_depth - level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1749 const int index= (x + y*w) << rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1750 const int block_w= 1<<(LOG2_MB_SIZE - level); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1751 int trx= (x+1)<<rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1752 int try= (y+1)<<rem_depth; |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1753 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1754 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1755 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1756 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1757 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1758 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1759 int pl = left->color[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1760 int pcb= left->color[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1761 int pcr= left->color[2]; |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1762 int pmx, pmy; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1763 int mx=0, my=0; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1764 int l,cr,cb; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1765 const int stride= s->current_picture.linesize[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1766 const int uvstride= s->current_picture.linesize[1]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1767 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1768 s->input_picture.data[1] + (x + y*uvstride)*block_w/2, |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1769 s->input_picture.data[2] + (x + y*uvstride)*block_w/2}; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1770 int P[10][2]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1771 int16_t last_mv[3][2]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1772 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1773 const int shift= 1+qpel; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1774 MotionEstContext *c= &s->m.me; |
3314 | 1775 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); |
4001 | 1776 int mx_context= av_log2(2*FFABS(left->mx - top->mx)); |
1777 int my_context= av_log2(2*FFABS(left->my - top->my)); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1778 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
3314 | 1779 int ref, best_ref, ref_score, ref_mx, ref_my; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1780 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1781 assert(sizeof(s->block_state) >= 256); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1782 if(s->keyframe){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1783 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1784 return 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1785 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1786 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1787 // clip predictors / edge ? |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1788 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1789 P_LEFT[0]= left->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1790 P_LEFT[1]= left->my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1791 P_TOP [0]= top->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1792 P_TOP [1]= top->my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1793 P_TOPRIGHT[0]= tr->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1794 P_TOPRIGHT[1]= tr->my; |
2967 | 1795 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1796 last_mv[0][0]= s->block[index].mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1797 last_mv[0][1]= s->block[index].my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1798 last_mv[1][0]= right->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1799 last_mv[1][1]= right->my; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1800 last_mv[2][0]= bottom->mx; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1801 last_mv[2][1]= bottom->my; |
2967 | 1802 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1803 s->m.mb_stride=2; |
2967 | 1804 s->m.mb_x= |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1805 s->m.mb_y= 0; |
4360
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1806 c->skip= 0; |
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1807 |
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1808 assert(c-> stride == stride); |
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1809 assert(c->uvstride == uvstride); |
2967 | 1810 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1811 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1812 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1813 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1814 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV; |
2967 | 1815 |
2206 | 1816 c->xmin = - x*block_w - 16+2; |
1817 c->ymin = - y*block_w - 16+2; | |
1818 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; | |
1819 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1820 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1821 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift); |
2967 | 1822 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1823 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1824 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1825 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1826 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1827 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1828 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1829 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1830 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1831 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1832 if (!y) { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1833 c->pred_x= P_LEFT[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1834 c->pred_y= P_LEFT[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1835 } else { |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1836 c->pred_x = P_MEDIAN[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1837 c->pred_y = P_MEDIAN[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1838 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1839 |
3314 | 1840 score= INT_MAX; |
1841 best_ref= 0; | |
1842 for(ref=0; ref<s->ref_frames; ref++){ | |
1843 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0); | |
1844 | |
1845 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv, | |
1846 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w); | |
1847 | |
1848 assert(ref_mx >= c->xmin); | |
1849 assert(ref_mx <= c->xmax); | |
1850 assert(ref_my >= c->ymin); | |
1851 assert(ref_my <= c->ymax); | |
1852 | |
4360
5f887a3c7281
Change the Snow encoder to always use the available MotionEstContext pointer.
takis
parents:
4332
diff
changeset
|
1853 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w); |
3314 | 1854 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0); |
1855 ref_score+= 2*av_log2(2*ref)*c->penalty_factor; | |
1856 if(s->ref_mvs[ref]){ | |
1857 s->ref_mvs[ref][index][0]= ref_mx; | |
1858 s->ref_mvs[ref][index][1]= ref_my; | |
1859 s->ref_scores[ref][index]= ref_score; | |
1860 } | |
1861 if(score > ref_score){ | |
1862 score= ref_score; | |
1863 best_ref= ref; | |
1864 mx= ref_mx; | |
1865 my= ref_my; | |
1866 } | |
1867 } | |
5127 | 1868 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2 |
2967 | 1869 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1870 // subpel search |
5085 | 1871 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1872 pc= s->c; |
2335 | 1873 pc.bytestream_start= |
1874 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1875 memcpy(p_state, s->block_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1876 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1877 if(level!=s->block_max_depth) |
2335 | 1878 put_rac(&pc, &p_state[4 + s_context], 1); |
1879 put_rac(&pc, &p_state[1 + left->type + top->type], 0); | |
3314 | 1880 if(s->ref_frames > 1) |
1881 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0); | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1882 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr); |
3314 | 1883 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1); |
1884 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1); | |
2335 | 1885 p_len= pc.bytestream - pc.bytestream_start; |
5082 | 1886 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1887 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1888 block_s= block_w*block_w; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1889 sum = pix_sum(current_data[0], stride, block_w); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1890 l= (sum + block_s/2)/block_s; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1891 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s; |
2967 | 1892 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1893 block_s= block_w*block_w>>2; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1894 sum = pix_sum(current_data[1], uvstride, block_w>>1); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1895 cb= (sum + block_s/2)/block_s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1896 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1897 sum = pix_sum(current_data[2], uvstride, block_w>>1); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1898 cr= (sum + block_s/2)/block_s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1899 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1900 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1901 ic= s->c; |
2335 | 1902 ic.bytestream_start= |
1903 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1904 memcpy(i_state, s->block_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1905 if(level!=s->block_max_depth) |
2335 | 1906 put_rac(&ic, &i_state[4 + s_context], 1); |
1907 put_rac(&ic, &i_state[1 + left->type + top->type], 1); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1908 put_symbol(&ic, &i_state[32], l-pl , 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1909 put_symbol(&ic, &i_state[64], cb-pcb, 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1910 put_symbol(&ic, &i_state[96], cr-pcr, 1); |
2335 | 1911 i_len= ic.bytestream - ic.bytestream_start; |
5082 | 1912 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1913 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1914 // assert(score==256*256*256*64-1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1915 assert(iscore < 255*255*256 + s->lambda2*10); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1916 assert(iscore >= 0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1917 assert(l>=0 && l<=255); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1918 assert(pl>=0 && pl<=255); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1919 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1920 if(level==0){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1921 int varc= iscore >> 8; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1922 int vard= score >> 8; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1923 if (vard <= 64 || vard < varc) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1924 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1925 else |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1926 c->scene_change_score+= s->m.qscale; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1927 } |
2967 | 1928 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1929 if(level!=s->block_max_depth){ |
2335 | 1930 put_rac(&s->c, &s->block_state[4 + s_context], 0); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1931 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1932 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1933 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1934 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1935 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead |
2967 | 1936 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1937 if(score2 < score && score2 < iscore) |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1938 return score2; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1939 } |
2967 | 1940 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1941 if(iscore < score){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1942 pred_mv(s, &pmx, &pmy, 0, left, top, tr); |
2335 | 1943 memcpy(pbbak, i_buffer, i_len); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1944 s->c= ic; |
2335 | 1945 s->c.bytestream_start= pbbak_start; |
1946 s->c.bytestream= pbbak + i_len; | |
3314 | 1947 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1948 memcpy(s->block_state, i_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1949 return iscore; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1950 }else{ |
2335 | 1951 memcpy(pbbak, p_buffer, p_len); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1952 s->c= pc; |
2335 | 1953 s->c.bytestream_start= pbbak_start; |
1954 s->c.bytestream= pbbak + p_len; | |
3314 | 1955 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1956 memcpy(s->block_state, p_state, sizeof(s->block_state)); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1957 return score; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1958 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1959 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
1960 |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
1961 static av_always_inline int same_block(BlockNode *a, BlockNode *b){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1962 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1963 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2])); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1964 }else{ |
3314 | 1965 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA)); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1966 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1967 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1968 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1969 static void encode_q_branch2(SnowContext *s, int level, int x, int y){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1970 const int w= s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1971 const int rem_depth= s->block_max_depth - level; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1972 const int index= (x + y*w) << rem_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1973 int trx= (x+1)<<rem_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1974 BlockNode *b= &s->block[index]; |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1975 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1976 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1977 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
1978 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1979 int pl = left->color[0]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1980 int pcb= left->color[1]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1981 int pcr= left->color[2]; |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1982 int pmx, pmy; |
3314 | 1983 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); |
4001 | 1984 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref; |
1985 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1986 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1987 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1988 if(s->keyframe){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
1989 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1990 return; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1991 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1992 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1993 if(level!=s->block_max_depth){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1994 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){ |
2995
dfc271b90fe6
4mv + iter ME 10l fix (still not bugfree but better)
michael
parents:
2994
diff
changeset
|
1995 put_rac(&s->c, &s->block_state[4 + s_context], 1); |
dfc271b90fe6
4mv + iter ME 10l fix (still not bugfree but better)
michael
parents:
2994
diff
changeset
|
1996 }else{ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1997 put_rac(&s->c, &s->block_state[4 + s_context], 0); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1998 encode_q_branch2(s, level+1, 2*x+0, 2*y+0); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
1999 encode_q_branch2(s, level+1, 2*x+1, 2*y+0); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2000 encode_q_branch2(s, level+1, 2*x+0, 2*y+1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2001 encode_q_branch2(s, level+1, 2*x+1, 2*y+1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2002 return; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2003 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2004 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2005 if(b->type & BLOCK_INTRA){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2006 pred_mv(s, &pmx, &pmy, 0, left, top, tr); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2007 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2008 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2009 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2010 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1); |
3314 | 2011 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2012 }else{ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2013 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2014 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0); |
3314 | 2015 if(s->ref_frames > 1) |
2016 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0); | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2017 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2018 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1); |
3314 | 2019 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2020 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2021 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2022 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2023 static void decode_q_branch(SnowContext *s, int level, int x, int y){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2024 const int w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2025 const int rem_depth= s->block_max_depth - level; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2026 const int index= (x + y*w) << rem_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2027 int trx= (x+1)<<rem_depth; |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2028 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2029 const BlockNode *top = y ? &s->block[index-w] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2030 const BlockNode *tl = y && x ? &s->block[index-w-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2031 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2032 int s_context= 2*left->level + 2*top->level + tl->level + tr->level; |
2967 | 2033 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2034 if(s->keyframe){ |
3314 | 2035 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2036 return; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2037 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2038 |
2335 | 2039 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){ |
4332 | 2040 int type, mx, my; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2041 int l = left->color[0]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2042 int cb= left->color[1]; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2043 int cr= left->color[2]; |
3314 | 2044 int ref = 0; |
2045 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); | |
4001 | 2046 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx)); |
2047 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my)); | |
2967 | 2048 |
2335 | 2049 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2050 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2051 if(type){ |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2052 pred_mv(s, &mx, &my, 0, left, top, tr); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2053 l += get_symbol(&s->c, &s->block_state[32], 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2054 cb+= get_symbol(&s->c, &s->block_state[64], 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2055 cr+= get_symbol(&s->c, &s->block_state[96], 1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2056 }else{ |
3314 | 2057 if(s->ref_frames > 1) |
2058 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0); | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2059 pred_mv(s, &mx, &my, ref, left, top, tr); |
3314 | 2060 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1); |
2061 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1); | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2062 } |
3314 | 2063 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2064 }else{ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2065 decode_q_branch(s, level+1, 2*x+0, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2066 decode_q_branch(s, level+1, 2*x+1, 2*y+0); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2067 decode_q_branch(s, level+1, 2*x+0, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2068 decode_q_branch(s, level+1, 2*x+1, 2*y+1); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2069 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2070 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2071 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2072 static void encode_blocks(SnowContext *s, int search){ |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2073 int x, y; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2074 int w= s->b_width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2075 int h= s->b_height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2076 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2077 if(s->avctx->me_method == ME_ITER && !s->keyframe && search) |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2078 iterative_me(s); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2079 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2080 for(y=0; y<h; y++){ |
2435
c89ac0e70c66
10l patch by (matthieu castet <castet.matthieu free fr>)
michael
parents:
2422
diff
changeset
|
2081 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit |
2422 | 2082 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); |
2083 return; | |
2084 } | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2085 for(x=0; x<w; x++){ |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
2086 if(s->avctx->me_method == ME_ITER || !search) |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2087 encode_q_branch2(s, 0, x, y); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2088 else |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2089 encode_q_branch (s, 0, x, y); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2090 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2091 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2092 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2093 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2094 static void decode_blocks(SnowContext *s){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2095 int x, y; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2096 int w= s->b_width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2097 int h= s->b_height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2098 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2099 for(y=0; y<h; y++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2100 for(x=0; x<w; x++){ |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2101 decode_q_branch(s, 0, x, y); |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2102 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2103 } |
2138 | 2104 } |
2105 | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2106 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ |
5648 | 2107 const static uint8_t weight[64]={ |
2108 8,7,6,5,4,3,2,1, | |
2109 7,7,0,0,0,0,0,1, | |
2110 6,0,6,0,0,0,2,0, | |
2111 5,0,0,5,0,3,0,0, | |
2112 4,0,0,0,4,0,0,0, | |
2113 3,0,0,5,0,3,0,0, | |
2114 2,0,6,0,0,0,2,0, | |
2115 1,7,0,0,0,0,0,1, | |
2116 }; | |
2117 | |
2118 const static uint8_t brane[256]={ | |
2119 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12, | |
2120 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52, | |
2121 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc, | |
2122 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc, | |
2123 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc, | |
2124 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc, | |
2125 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc, | |
2126 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16, | |
2127 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56, | |
2128 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96, | |
2129 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc, | |
2130 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc, | |
2131 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc, | |
2132 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc, | |
2133 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc, | |
2134 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A, | |
2135 }; | |
2136 | |
2137 const static uint8_t needs[16]={ | |
2138 0,1,0,0, | |
2139 2,4,2,0, | |
2140 0,1,0,0, | |
2141 15 | |
2142 }; | |
2143 | |
2144 int x, y, b, r, l; | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2145 int16_t tmpIt [64*(32+HTAPS_MAX)]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2146 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)]; |
5648 | 2147 int16_t *tmpI= tmpIt; |
2148 uint8_t *tmp2= tmp2t[0]; | |
6205 | 2149 const uint8_t *hpel[11]; |
5648 | 2150 assert(dx<16 && dy<16); |
2151 r= brane[dx + 16*dy]&15; | |
2152 l= brane[dx + 16*dy]>>4; | |
2153 | |
2154 b= needs[l] | needs[r]; | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2155 if(p && !p->diag_mc) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2156 b= 15; |
5648 | 2157 |
2158 if(b&5){ | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2159 for(y=0; y < b_h+HTAPS_MAX-1; y++){ |
5649 | 2160 for(x=0; x < b_w; x++){ |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2161 int a_1=src[x + HTAPS_MAX/2-4]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2162 int a0= src[x + HTAPS_MAX/2-3]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2163 int a1= src[x + HTAPS_MAX/2-2]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2164 int a2= src[x + HTAPS_MAX/2-1]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2165 int a3= src[x + HTAPS_MAX/2+0]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2166 int a4= src[x + HTAPS_MAX/2+1]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2167 int a5= src[x + HTAPS_MAX/2+2]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2168 int a6= src[x + HTAPS_MAX/2+3]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2169 int am=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2170 if(!p || p->fast_mc){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2171 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2172 tmpI[x]= am; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2173 am= (am+16)>>5; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2174 }else{ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2175 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2176 tmpI[x]= am; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2177 am= (am+32)>>6; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2178 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2179 |
5649 | 2180 if(am&(~255)) am= ~(am>>31); |
2181 tmp2[x]= am; | |
2182 } | |
2183 tmpI+= 64; | |
2184 tmp2+= stride; | |
2185 src += stride; | |
2138 | 2186 } |
5649 | 2187 src -= stride*y; |
5648 | 2188 } |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2189 src += HTAPS_MAX/2 - 1; |
5648 | 2190 tmp2= tmp2t[1]; |
2191 | |
2192 if(b&2){ | |
5649 | 2193 for(y=0; y < b_h; y++){ |
2194 for(x=0; x < b_w+1; x++){ | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2195 int a_1=src[x + (HTAPS_MAX/2-4)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2196 int a0= src[x + (HTAPS_MAX/2-3)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2197 int a1= src[x + (HTAPS_MAX/2-2)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2198 int a2= src[x + (HTAPS_MAX/2-1)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2199 int a3= src[x + (HTAPS_MAX/2+0)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2200 int a4= src[x + (HTAPS_MAX/2+1)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2201 int a5= src[x + (HTAPS_MAX/2+2)*stride]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2202 int a6= src[x + (HTAPS_MAX/2+3)*stride]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2203 int am=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2204 if(!p || p->fast_mc) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2205 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2206 else |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2207 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2208 |
5649 | 2209 if(am&(~255)) am= ~(am>>31); |
2210 tmp2[x]= am; | |
2211 } | |
2212 src += stride; | |
2213 tmp2+= stride; | |
5648 | 2214 } |
5649 | 2215 src -= stride*y; |
5648 | 2216 } |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2217 src += stride*(HTAPS_MAX/2 - 1); |
5648 | 2218 tmp2= tmp2t[2]; |
2219 tmpI= tmpIt; | |
2220 if(b&4){ | |
2221 for(y=0; y < b_h; y++){ | |
2222 for(x=0; x < b_w; x++){ | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2223 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2224 int a0= tmpI[x + (HTAPS_MAX/2-3)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2225 int a1= tmpI[x + (HTAPS_MAX/2-2)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2226 int a2= tmpI[x + (HTAPS_MAX/2-1)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2227 int a3= tmpI[x + (HTAPS_MAX/2+0)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2228 int a4= tmpI[x + (HTAPS_MAX/2+1)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2229 int a5= tmpI[x + (HTAPS_MAX/2+2)*64]; |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2230 int a6= tmpI[x + (HTAPS_MAX/2+3)*64]; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2231 int am=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2232 if(!p || p->fast_mc) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2233 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2234 else |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2235 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12; |
5648 | 2236 if(am&(~255)) am= ~(am>>31); |
2237 tmp2[x]= am; | |
2238 } | |
2239 tmpI+= 64; | |
2240 tmp2+= stride; | |
2138 | 2241 } |
5648 | 2242 } |
2243 | |
2244 hpel[ 0]= src; | |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2245 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1); |
5648 | 2246 hpel[ 2]= src + 1; |
2247 | |
2248 hpel[ 4]= tmp2t[1]; | |
2249 hpel[ 5]= tmp2t[2]; | |
2250 hpel[ 6]= tmp2t[1] + 1; | |
2251 | |
2252 hpel[ 8]= src + stride; | |
2253 hpel[ 9]= hpel[1] + stride; | |
2254 hpel[10]= hpel[8] + 1; | |
2255 | |
2256 if(b==15){ | |
6205 | 2257 const uint8_t *src1= hpel[dx/8 + dy/8*4 ]; |
2258 const uint8_t *src2= hpel[dx/8 + dy/8*4+1]; | |
2259 const uint8_t *src3= hpel[dx/8 + dy/8*4+4]; | |
2260 const uint8_t *src4= hpel[dx/8 + dy/8*4+5]; | |
5648 | 2261 dx&=7; |
2262 dy&=7; | |
2263 for(y=0; y < b_h; y++){ | |
2264 for(x=0; x < b_w; x++){ | |
2265 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+ | |
2266 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6; | |
2267 } | |
2268 src1+=stride; | |
2269 src2+=stride; | |
2270 src3+=stride; | |
2271 src4+=stride; | |
2272 dst +=stride; | |
2273 } | |
2274 }else{ | |
6205 | 2275 const uint8_t *src1= hpel[l]; |
2276 const uint8_t *src2= hpel[r]; | |
5648 | 2277 int a= weight[((dx&7) + (8*(dy&7)))]; |
2278 int b= 8-a; | |
2279 for(y=0; y < b_h; y++){ | |
2280 for(x=0; x < b_w; x++){ | |
2281 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3; | |
2282 } | |
2283 src1+=stride; | |
2284 src2+=stride; | |
2285 dst +=stride; | |
2286 } | |
2138 | 2287 } |
2288 } | |
2289 | |
2290 #define mca(dx,dy,b_w)\ | |
5254 | 2291 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\ |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2292 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\ |
2138 | 2293 assert(h==b_w);\ |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2294 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\ |
2138 | 2295 } |
2296 | |
2297 mca( 0, 0,16) | |
2298 mca( 8, 0,16) | |
2299 mca( 0, 8,16) | |
2300 mca( 8, 8,16) | |
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2301 mca( 0, 0,8) |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2302 mca( 8, 0,8) |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2303 mca( 0, 8,8) |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
2304 mca( 8, 8,8) |
2138 | 2305 |
3314 | 2306 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2307 if(block->type & BLOCK_INTRA){ |
2206 | 2308 int x, y; |
3018 | 2309 const int color = block->color[plane_index]; |
2310 const int color4= color*0x01010101; | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2311 if(b_w==32){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2312 for(y=0; y < b_h; y++){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2313 *(uint32_t*)&dst[0 + y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2314 *(uint32_t*)&dst[4 + y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2315 *(uint32_t*)&dst[8 + y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2316 *(uint32_t*)&dst[12+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2317 *(uint32_t*)&dst[16+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2318 *(uint32_t*)&dst[20+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2319 *(uint32_t*)&dst[24+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2320 *(uint32_t*)&dst[28+ y*stride]= color4; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2321 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2322 }else if(b_w==16){ |
3018 | 2323 for(y=0; y < b_h; y++){ |
2324 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2325 *(uint32_t*)&dst[4 + y*stride]= color4; | |
2326 *(uint32_t*)&dst[8 + y*stride]= color4; | |
2327 *(uint32_t*)&dst[12+ y*stride]= color4; | |
2328 } | |
2329 }else if(b_w==8){ | |
2330 for(y=0; y < b_h; y++){ | |
2331 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2332 *(uint32_t*)&dst[4 + y*stride]= color4; | |
2333 } | |
2334 }else if(b_w==4){ | |
2335 for(y=0; y < b_h; y++){ | |
2336 *(uint32_t*)&dst[0 + y*stride]= color4; | |
2337 } | |
2338 }else{ | |
2339 for(y=0; y < b_h; y++){ | |
2340 for(x=0; x < b_w; x++){ | |
2341 dst[x + y*stride]= color; | |
2342 } | |
2138 | 2343 } |
2344 } | |
2345 }else{ | |
3314 | 2346 uint8_t *src= s->last_picture[block->ref].data[plane_index]; |
2206 | 2347 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale; |
2348 int mx= block->mx*scale; | |
2349 int my= block->my*scale; | |
2223 | 2350 const int dx= mx&15; |
2351 const int dy= my&15; | |
3020
c75fb0747e74
use h264 MC functions for 2xX Xx2 blocks in snow too
michael
parents:
3018
diff
changeset
|
2352 const int tab_index= 3 - (b_w>>2) + (b_w>>4); |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2353 sx += (mx>>4) - (HTAPS_MAX/2-1); |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2354 sy += (my>>4) - (HTAPS_MAX/2-1); |
2206 | 2355 src += sx + sy*stride; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2356 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2) |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2357 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){ |
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2358 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h); |
2206 | 2359 src= tmp + MB_SIZE; |
2138 | 2360 } |
3189 | 2361 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); |
2362 // assert(!(b_w&(b_w-1))); | |
3018 | 2363 assert(b_w>1 && b_h>1); |
6206
ea591bc7522a
add parenthesis, fix warning: snow.c:2413: warning: suggest parentheses around && within ||
bcoudurier
parents:
6205
diff
changeset
|
2364 assert((tab_index>=0 && tab_index<4) || b_w==32); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2365 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc ) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2366 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2367 else if(b_w==32){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2368 int y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2369 for(y=0; y<b_h; y+=16){ |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2370 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2371 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2372 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2373 }else if(b_w==b_h) |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2374 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride); |
3018 | 2375 else if(b_w==2*b_h){ |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2376 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2377 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride); |
3018 | 2378 }else{ |
2379 assert(2*b_w==b_h); | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2380 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
2381 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride); |
3018 | 2382 } |
2138 | 2383 } |
2384 } | |
2385 | |
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2386 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2387 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2388 int y, x; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2389 IDWTELEM * dst; |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2390 for(y=0; y<b_h; y++){ |
5409 | 2391 //FIXME ugly misuse of obmc_stride |
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2392 const uint8_t *obmc1= obmc + y*obmc_stride; |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2393 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2394 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2395 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); |
3198
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2396 dst = slice_buffer_get_line(sb, src_y + y); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2397 for(x=0; x<b_w; x++){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2398 int v= obmc1[x] * block[3][x + y*src_stride] |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2399 +obmc2[x] * block[2][x + y*src_stride] |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2400 +obmc3[x] * block[1][x + y*src_stride] |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2401 +obmc4[x] * block[0][x + y*src_stride]; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2402 |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2403 v <<= 8 - LOG2_OBMC_MAX; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2404 if(FRAC_BITS != 8){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2405 v >>= 8 - FRAC_BITS; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2406 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2407 if(add){ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2408 v += dst[x + src_x]; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2409 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2410 if(v&(~255)) v= ~(v>>31); |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2411 dst8[x + y*src_stride] = v; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2412 }else{ |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2413 dst[x + src_x] -= v; |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2414 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2415 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2416 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2417 } |
6b9f0c4fbdbe
First part of a series of speed-enchancing patches.
gpoirier
parents:
3197
diff
changeset
|
2418 |
6412 | 2419 //FIXME name cleanup (b_w, block_w, b_width stuff) |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2420 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2421 const int b_width = s->b_width << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2422 const int b_height= s->b_height << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2423 const int b_stride= b_width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2424 BlockNode *lt= &s->block[b_x + b_y*b_stride]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2425 BlockNode *rt= lt+1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2426 BlockNode *lb= lt+b_stride; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2427 BlockNode *rb= lb+1; |
2967 | 2428 uint8_t *block[4]; |
2842 | 2429 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride; |
2430 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align | |
2431 uint8_t *ptmp; | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2432 int x,y; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2433 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2434 if(b_x<0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2435 lt= rt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2436 lb= rb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2437 }else if(b_x + 1 >= b_width){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2438 rt= lt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2439 rb= lb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2440 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2441 if(b_y<0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2442 lt= lb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2443 rt= rb; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2444 }else if(b_y + 1 >= b_height){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2445 lb= lt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2446 rb= rt; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2447 } |
2967 | 2448 |
6412 | 2449 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2450 obmc -= src_x; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2451 b_w += src_x; |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2452 if(!sliced && !offset_dst) |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2453 dst -= src_x; |
2206 | 2454 src_x=0; |
2455 }else if(src_x + b_w > w){ | |
2456 b_w = w - src_x; | |
2457 } | |
2458 if(src_y<0){ | |
2459 obmc -= src_y*obmc_stride; | |
2460 b_h += src_y; | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2461 if(!sliced && !offset_dst) |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2462 dst -= src_y*dst_stride; |
2206 | 2463 src_y=0; |
2464 }else if(src_y + b_h> h){ | |
2465 b_h = h - src_y; | |
2466 } | |
2967 | 2467 |
2206 | 2468 if(b_w<=0 || b_h<=0) return; |
2469 | |
6414 | 2470 assert(src_stride > 2*MB_SIZE + 5); |
2471 | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2472 if(!sliced && offset_dst) |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2473 dst += src_x + src_y*dst_stride; |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2474 dst8+= src_x + src_y*src_stride; |
2206 | 2475 // src += src_x + src_y*src_stride; |
2476 | |
2842 | 2477 ptmp= tmp + 3*tmp_step; |
2478 block[0]= ptmp; | |
2479 ptmp+=tmp_step; | |
3314 | 2480 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); |
2206 | 2481 |
2482 if(same_block(lt, rt)){ | |
2483 block[1]= block[0]; | |
2484 }else{ | |
2842 | 2485 block[1]= ptmp; |
2486 ptmp+=tmp_step; | |
3314 | 2487 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); |
2206 | 2488 } |
2967 | 2489 |
2206 | 2490 if(same_block(lt, lb)){ |
2491 block[2]= block[0]; | |
2492 }else if(same_block(rt, lb)){ | |
2493 block[2]= block[1]; | |
2494 }else{ | |
2842 | 2495 block[2]= ptmp; |
2496 ptmp+=tmp_step; | |
3314 | 2497 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); |
2206 | 2498 } |
2499 | |
2500 if(same_block(lt, rb) ){ | |
2501 block[3]= block[0]; | |
2502 }else if(same_block(rt, rb)){ | |
2503 block[3]= block[1]; | |
2504 }else if(same_block(lb, rb)){ | |
2505 block[3]= block[2]; | |
2506 }else{ | |
2842 | 2507 block[3]= ptmp; |
3314 | 2508 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); |
2206 | 2509 } |
2510 #if 0 | |
2511 for(y=0; y<b_h; y++){ | |
2512 for(x=0; x<b_w; x++){ | |
2513 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); | |
2514 if(add) dst[x + y*dst_stride] += v; | |
2515 else dst[x + y*dst_stride] -= v; | |
2516 } | |
2517 } | |
2518 for(y=0; y<b_h; y++){ | |
2519 uint8_t *obmc2= obmc + (obmc_stride>>1); | |
2520 for(x=0; x<b_w; x++){ | |
2521 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); | |
2522 if(add) dst[x + y*dst_stride] += v; | |
2523 else dst[x + y*dst_stride] -= v; | |
2524 } | |
2525 } | |
2526 for(y=0; y<b_h; y++){ | |
2527 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2528 for(x=0; x<b_w; x++){ | |
2529 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); | |
2530 if(add) dst[x + y*dst_stride] += v; | |
2531 else dst[x + y*dst_stride] -= v; | |
2532 } | |
2533 } | |
2534 for(y=0; y<b_h; y++){ | |
2535 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); | |
2536 uint8_t *obmc4= obmc3+ (obmc_stride>>1); | |
2537 for(x=0; x<b_w; x++){ | |
2538 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); | |
2539 if(add) dst[x + y*dst_stride] += v; | |
2540 else dst[x + y*dst_stride] -= v; | |
2541 } | |
2542 } | |
2543 #else | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2544 if(sliced){ |
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2545 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); |
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2546 }else |
2206 | 2547 for(y=0; y<b_h; y++){ |
5409 | 2548 //FIXME ugly misuse of obmc_stride |
4436
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2549 const uint8_t *obmc1= obmc + y*obmc_stride; |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2550 const uint8_t *obmc2= obmc1+ (obmc_stride>>1); |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2551 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); |
d3e389536b0a
Add the const specifier as needed to reduce the number of warnings.
takis
parents:
4409
diff
changeset
|
2552 const uint8_t *obmc4= obmc3+ (obmc_stride>>1); |
2206 | 2553 for(x=0; x<b_w; x++){ |
2554 int v= obmc1[x] * block[3][x + y*src_stride] | |
2555 +obmc2[x] * block[2][x + y*src_stride] | |
2556 +obmc3[x] * block[1][x + y*src_stride] | |
2557 +obmc4[x] * block[0][x + y*src_stride]; | |
2967 | 2558 |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2559 v <<= 8 - LOG2_OBMC_MAX; |
2246 | 2560 if(FRAC_BITS != 8){ |
2561 v >>= 8 - FRAC_BITS; | |
2562 } | |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2563 if(add){ |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2564 v += dst[x + y*dst_stride]; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2565 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2566 if(v&(~255)) v= ~(v>>31); |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2567 dst8[x + y*src_stride] = v; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2568 }else{ |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2569 dst[x + y*dst_stride] -= v; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2570 } |
2206 | 2571 } |
2572 } | |
5910 | 2573 #endif /* 0 */ |
2206 | 2574 } |
2575 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2576 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2577 Plane *p= &s->plane[plane_index]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2578 const int mb_w= s->b_width << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2579 const int mb_h= s->b_height << s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2580 int x, y, mb_x; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2581 int block_size = MB_SIZE >> s->block_max_depth; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2582 int block_w = plane_index ? block_size/2 : block_size; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2583 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2584 int obmc_stride= plane_index ? block_size : 2*block_size; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2585 int ref_stride= s->current_picture.linesize[plane_index]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2586 uint8_t *dst8= s->current_picture.data[plane_index]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2587 int w= p->width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2588 int h= p->height; |
2967 | 2589 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2590 if(s->keyframe || (s->avctx->debug&512)){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2591 if(mb_y==mb_h) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2592 return; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2593 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2594 if(add){ |
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2595 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++) |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2596 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2597 // DWTELEM * line = slice_buffer_get_line(sb, y); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2598 IDWTELEM * line = sb->line[y]; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2599 for(x=0; x<w; x++) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2600 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2601 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2602 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2603 v >>= FRAC_BITS; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2604 if(v&(~255)) v= ~(v>>31); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2605 dst8[x + y*ref_stride]= v; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2606 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2607 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2608 }else{ |
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2609 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++) |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2610 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2611 // DWTELEM * line = slice_buffer_get_line(sb, y); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2612 IDWTELEM * line = sb->line[y]; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2613 for(x=0; x<w; x++) |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2614 { |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2615 line[x] -= 128 << FRAC_BITS; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2616 // buf[x + y*w]-= 128<<FRAC_BITS; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2617 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2618 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2619 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2620 |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2621 return; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2622 } |
2967 | 2623 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2624 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2625 add_yblock(s, 1, sb, old_buffer, dst8, obmc, |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2626 block_w*mb_x - block_w/2, |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2627 block_w*mb_y - block_w/2, |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2628 block_w, block_w, |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2629 w, h, |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2630 w, ref_stride, obmc_stride, |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2631 mb_x - 1, mb_y - 1, |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2632 add, 0, plane_index); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2633 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2634 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
2635 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2636 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){ |
2138 | 2637 Plane *p= &s->plane[plane_index]; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2638 const int mb_w= s->b_width << s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2639 const int mb_h= s->b_height << s->block_max_depth; |
2562 | 2640 int x, y, mb_x; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2641 int block_size = MB_SIZE >> s->block_max_depth; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
2642 int block_w = plane_index ? block_size/2 : block_size; |
2206 | 2643 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2644 const int obmc_stride= plane_index ? block_size : 2*block_size; |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2645 int ref_stride= s->current_picture.linesize[plane_index]; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2646 uint8_t *dst8= s->current_picture.data[plane_index]; |
2138 | 2647 int w= p->width; |
2648 int h= p->height; | |
2967 | 2649 |
2206 | 2650 if(s->keyframe || (s->avctx->debug&512)){ |
2562 | 2651 if(mb_y==mb_h) |
2652 return; | |
2653 | |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2654 if(add){ |
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2655 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2656 for(x=0; x<w; x++){ |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2657 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2658 v >>= FRAC_BITS; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2659 if(v&(~255)) v= ~(v>>31); |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2660 dst8[x + y*ref_stride]= v; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2661 } |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2662 } |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2663 }else{ |
2604
b7e6c3d31c65
Snow segfault bug in revision 1.43 patch by (Yartrebo /yartrebo earthlink net/)
michael
parents:
2602
diff
changeset
|
2664 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ |
2249
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2665 for(x=0; x<w; x++){ |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2666 buf[x + y*w]-= 128<<FRAC_BITS; |
2b1a5e1fd449
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
michael
parents:
2246
diff
changeset
|
2667 } |
2206 | 2668 } |
2138 | 2669 } |
2206 | 2670 |
2671 return; | |
2138 | 2672 } |
2967 | 2673 |
6414 | 2674 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
2675 add_yblock(s, 0, NULL, buf, dst8, obmc, | |
2676 block_w*mb_x - block_w/2, | |
2677 block_w*mb_y - block_w/2, | |
2678 block_w, block_w, | |
2679 w, h, | |
2680 w, ref_stride, obmc_stride, | |
2681 mb_x - 1, mb_y - 1, | |
2682 add, 1, plane_index); | |
2683 } | |
2562 | 2684 } |
2685 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2686 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){ |
2562 | 2687 const int mb_h= s->b_height << s->block_max_depth; |
2688 int mb_y; | |
2689 for(mb_y=0; mb_y<=mb_h; mb_y++) | |
2690 predict_slice(s, buf, plane_index, add, mb_y); | |
2138 | 2691 } |
2692 | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2693 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2694 int i, x2, y2; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2695 Plane *p= &s->plane[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2696 const int block_size = MB_SIZE >> s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2697 const int block_w = plane_index ? block_size/2 : block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2698 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2699 const int obmc_stride= plane_index ? block_size : 2*block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2700 const int ref_stride= s->current_picture.linesize[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2701 uint8_t *src= s-> input_picture.data[plane_index]; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2702 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2703 const int b_stride = s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2704 const int w= p->width; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2705 const int h= p->height; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2706 int index= mb_x + mb_y*b_stride; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2707 BlockNode *b= &s->block[index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2708 BlockNode backup= *b; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2709 int ab=0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2710 int aa=0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2711 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2712 b->type|= BLOCK_INTRA; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2713 b->color[plane_index]= 0; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2714 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM)); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2715 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2716 for(i=0; i<4; i++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2717 int mb_x2= mb_x + (i &1) - 1; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2718 int mb_y2= mb_y + (i>>1) - 1; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2719 int x= block_w*mb_x2 + block_w/2; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2720 int y= block_w*mb_y2 + block_w/2; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2721 |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2722 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc, |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2723 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2724 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2725 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2726 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2727 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2728 int obmc_v= obmc[index]; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2729 int d; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2730 if(y<0) obmc_v += obmc[index + block_w*obmc_stride]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2731 if(x<0) obmc_v += obmc[index + block_w]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2732 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2733 if(x+block_w>w) obmc_v += obmc[index - block_w]; |
6412 | 2734 //FIXME precalculate this or simplify it somehow else |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2735 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2736 d = -dst[index] + (1<<(FRAC_BITS-1)); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2737 dst[index] = d; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2738 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v; |
6412 | 2739 aa += obmc_v * obmc_v; //FIXME precalculate this |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2740 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2741 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2742 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2743 *b= backup; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2744 |
5127 | 2745 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2746 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2747 |
3051 | 2748 static inline int get_block_bits(SnowContext *s, int x, int y, int w){ |
2749 const int b_stride = s->b_width << s->block_max_depth; | |
2750 const int b_height = s->b_height<< s->block_max_depth; | |
2751 int index= x + y*b_stride; | |
4408
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2752 const BlockNode *b = &s->block[index]; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2753 const BlockNode *left = x ? &s->block[index-1] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2754 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2755 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left; |
c3be13eac2d6
Bring down the number of snow.c warnings from 27 to 17 by using the const
takis
parents:
4407
diff
changeset
|
2756 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl; |
3051 | 2757 int dmx, dmy; |
4001 | 2758 // int mx_context= av_log2(2*FFABS(left->mx - top->mx)); |
2759 // int my_context= av_log2(2*FFABS(left->my - top->my)); | |
3051 | 2760 |
2761 if(x<0 || x>=b_stride || y>=b_height) | |
2762 return 0; | |
2763 /* | |
2764 1 0 0 | |
2765 01X 1-2 1 | |
2766 001XX 3-6 2-3 | |
2767 0001XXX 7-14 4-7 | |
2768 00001XXXX 15-30 8-15 | |
2769 */ | |
2770 //FIXME try accurate rate | |
6412 | 2771 //FIXME intra and inter predictors if surrounding blocks are not the same type |
3051 | 2772 if(b->type & BLOCK_INTRA){ |
4001 | 2773 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0])) |
2774 + av_log2(2*FFABS(left->color[1] - b->color[1])) | |
2775 + av_log2(2*FFABS(left->color[2] - b->color[2]))); | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2776 }else{ |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2777 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr); |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2778 dmx-= b->mx; |
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2779 dmy-= b->my; |
4001 | 2780 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda |
2781 + av_log2(2*FFABS(dmy)) | |
3314 | 2782 + av_log2(2*b->ref)); |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
2783 } |
3051 | 2784 } |
2785 | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2786 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2787 Plane *p= &s->plane[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2788 const int block_size = MB_SIZE >> s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2789 const int block_w = plane_index ? block_size/2 : block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2790 const int obmc_stride= plane_index ? block_size : 2*block_size; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2791 const int ref_stride= s->current_picture.linesize[plane_index]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2792 uint8_t *dst= s->current_picture.data[plane_index]; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2793 uint8_t *src= s-> input_picture.data[plane_index]; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2794 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2795 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
2796 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2797 const int b_stride = s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2798 const int b_height = s->b_height<< s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2799 const int w= p->width; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2800 const int h= p->height; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2801 int distortion; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2802 int rate= 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2803 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2804 int sx= block_w*mb_x - block_w/2; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2805 int sy= block_w*mb_y - block_w/2; |
3206 | 2806 int x0= FFMAX(0,-sx); |
2807 int y0= FFMAX(0,-sy); | |
2808 int x1= FFMIN(block_w*2, w-sx); | |
2809 int y1= FFMIN(block_w*2, h-sy); | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2810 int i,x,y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2811 |
3314 | 2812 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2813 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2814 for(y=y0; y<y1; y++){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2815 const uint8_t *obmc1= obmc_edged + y*obmc_stride; |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2816 const IDWTELEM *pred1 = pred + y*obmc_stride; |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2817 uint8_t *cur1 = cur + y*ref_stride; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2818 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2819 for(x=x0; x<x1; x++){ |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2820 #if FRAC_BITS >= LOG2_OBMC_MAX |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2821 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2822 #else |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2823 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
2824 #endif |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2825 v = (v + pred1[x]) >> FRAC_BITS; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2826 if(v&(~255)) v= ~(v>>31); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2827 dst1[x] = v; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2828 } |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2829 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2830 |
3206 | 2831 /* copy the regions where obmc[] = (uint8_t)256 */ |
2832 if(LOG2_OBMC_MAX == 8 | |
2833 && (mb_x == 0 || mb_x == b_stride-1) | |
2834 && (mb_y == 0 || mb_y == b_height-1)){ | |
2835 if(mb_x == 0) | |
2836 x1 = block_w; | |
2837 else | |
2838 x0 = block_w; | |
2839 if(mb_y == 0) | |
2840 y1 = block_w; | |
2841 else | |
2842 y0 = block_w; | |
2843 for(y=y0; y<y1; y++) | |
2844 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0); | |
2845 } | |
2846 | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2847 if(block_w==16){ |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2848 /* FIXME rearrange dsputil to fit 32x32 cmp functions */ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2849 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */ |
6412 | 2850 /* FIXME cmps overlap but do not cover the wavelet's whole support. |
2851 * So improving the score of one block is not strictly guaranteed | |
2852 * to improve the score of the whole frame, thus iterative motion | |
2853 * estimation does not always converge. */ | |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2854 if(s->avctx->me_cmp == FF_CMP_W97) |
4197 | 2855 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2856 else if(s->avctx->me_cmp == FF_CMP_W53) |
4197 | 2857 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2858 else{ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2859 distortion = 0; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2860 for(i=0; i<4; i++){ |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2861 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride; |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2862 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16); |
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
2863 } |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2864 } |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2865 }else{ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2866 assert(block_w==8); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2867 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2868 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2869 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2870 if(plane_index==0){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2871 for(i=0; i<4; i++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2872 /* ..RRr |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2873 * .RXx. |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2874 * rxx.. |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2875 */ |
3051 | 2876 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2877 } |
3057 | 2878 if(mb_x == b_stride-2) |
2879 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1); | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2880 } |
3051 | 2881 return distortion + rate*penalty_factor; |
2882 } | |
2883 | |
2884 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ | |
2885 int i, y2; | |
2886 Plane *p= &s->plane[plane_index]; | |
2887 const int block_size = MB_SIZE >> s->block_max_depth; | |
2888 const int block_w = plane_index ? block_size/2 : block_size; | |
2889 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | |
2890 const int obmc_stride= plane_index ? block_size : 2*block_size; | |
2891 const int ref_stride= s->current_picture.linesize[plane_index]; | |
2892 uint8_t *dst= s->current_picture.data[plane_index]; | |
2893 uint8_t *src= s-> input_picture.data[plane_index]; | |
6426 | 2894 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst |
2895 // const has only been removed from zero_dst to suppress a warning | |
2896 static IDWTELEM zero_dst[4096]; //FIXME | |
3051 | 2897 const int b_stride = s->b_width << s->block_max_depth; |
2898 const int w= p->width; | |
2899 const int h= p->height; | |
2900 int distortion= 0; | |
2901 int rate= 0; | |
2902 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); | |
2903 | |
2904 for(i=0; i<9; i++){ | |
2905 int mb_x2= mb_x + (i%3) - 1; | |
2906 int mb_y2= mb_y + (i/3) - 1; | |
2907 int x= block_w*mb_x2 + block_w/2; | |
2908 int y= block_w*mb_y2 + block_w/2; | |
2909 | |
3662
fc714e9a5419
snow cosmetics: merge the sliced and non-sliced versions of add_yblock
lorenm
parents:
3661
diff
changeset
|
2910 add_yblock(s, 0, NULL, zero_dst, dst, obmc, |
3051 | 2911 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index); |
2912 | |
2913 //FIXME find a cleaner/simpler way to skip the outside stuff | |
2914 for(y2= y; y2<0; y2++) | |
2915 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | |
2916 for(y2= h; y2<y+block_w; y2++) | |
2917 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w); | |
2918 if(x<0){ | |
2919 for(y2= y; y2<y+block_w; y2++) | |
2920 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x); | |
2921 } | |
2922 if(x+block_w > w){ | |
2923 for(y2= y; y2<y+block_w; y2++) | |
2924 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w); | |
2925 } | |
2926 | |
2927 assert(block_w== 8 || block_w==16); | |
2928 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w); | |
2929 } | |
2930 | |
2931 if(plane_index==0){ | |
2932 BlockNode *b= &s->block[mb_x+mb_y*b_stride]; | |
2933 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1); | |
2934 | |
2935 /* ..RRRr | |
2936 * .RXXx. | |
2937 * .RXXx. | |
2938 * rxxx. | |
2939 */ | |
2940 if(merged) | |
2941 rate = get_block_bits(s, mb_x, mb_y, 2); | |
2942 for(i=merged?4:0; i<9; i++){ | |
2943 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}}; | |
2944 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1); | |
2945 } | |
2946 } | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2947 return distortion + rate*penalty_factor; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2948 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2949 |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2950 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2951 const int b_stride= s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2952 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2953 BlockNode backup= *block; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2954 int rd, index, value; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2955 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2956 assert(mb_x>=0 && mb_y>=0); |
2994 | 2957 assert(mb_x<b_stride); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2958 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2959 if(intra){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2960 block->color[0] = p[0]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2961 block->color[1] = p[1]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2962 block->color[2] = p[2]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2963 block->type |= BLOCK_INTRA; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2964 }else{ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2965 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1); |
3314 | 2966 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2967 if(s->me_cache[index] == value) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2968 return 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2969 s->me_cache[index]= value; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2970 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2971 block->mx= p[0]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2972 block->my= p[1]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2973 block->type &= ~BLOCK_INTRA; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2974 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2975 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
2976 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2977 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2978 //FIXME chroma |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2979 if(rd < *best_rd){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2980 *best_rd= rd; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2981 return 1; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2982 }else{ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2983 *block= backup; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2984 return 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2985 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2986 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
2987 |
6412 | 2988 /* special case for int[2] args we discard afterwards, |
2989 * fixes compilation problem with gcc 2.95 */ | |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2990 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){ |
2999
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2991 int p[2] = {p0, p1}; |
3197 | 2992 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd); |
2999
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2993 } |
bc83e7a080a5
this fixes compilation with gcc 2.95 which seems to not like array casting... special case inline of check_block which makes the array and passes it
mmu_man
parents:
2998
diff
changeset
|
2994 |
4283
d6f83e2f8804
rename always_inline to av_always_inline and move to common.h
mru
parents:
4197
diff
changeset
|
2995 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){ |
3051 | 2996 const int b_stride= s->b_width << s->block_max_depth; |
2997 BlockNode *block= &s->block[mb_x + mb_y * b_stride]; | |
2998 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]}; | |
2999 int rd, index, value; | |
3000 | |
3001 assert(mb_x>=0 && mb_y>=0); | |
3002 assert(mb_x<b_stride); | |
3003 assert(((mb_x|mb_y)&1) == 0); | |
3004 | |
3005 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1); | |
3314 | 3006 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12); |
3051 | 3007 if(s->me_cache[index] == value) |
3008 return 0; | |
3009 s->me_cache[index]= value; | |
3010 | |
3011 block->mx= p0; | |
3012 block->my= p1; | |
3314 | 3013 block->ref= ref; |
3051 | 3014 block->type &= ~BLOCK_INTRA; |
3015 block[1]= block[b_stride]= block[b_stride+1]= *block; | |
3016 | |
3017 rd= get_4block_rd(s, mb_x, mb_y, 0); | |
3018 | |
3019 //FIXME chroma | |
3020 if(rd < *best_rd){ | |
3021 *best_rd= rd; | |
3022 return 1; | |
3023 }else{ | |
3024 block[0]= backup[0]; | |
3025 block[1]= backup[1]; | |
3026 block[b_stride]= backup[2]; | |
3027 block[b_stride+1]= backup[3]; | |
3028 return 0; | |
3029 } | |
3030 } | |
3031 | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3032 static void iterative_me(SnowContext *s){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3033 int pass, mb_x, mb_y; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3034 const int b_width = s->b_width << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3035 const int b_height= s->b_height << s->block_max_depth; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3036 const int b_stride= b_width; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3037 int color[3]; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3038 |
3194
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3039 { |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3040 RangeCoder r = s->c; |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3041 uint8_t state[sizeof(s->block_state)]; |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3042 memcpy(state, s->block_state, sizeof(s->block_state)); |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3043 for(mb_y= 0; mb_y<s->b_height; mb_y++) |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3044 for(mb_x= 0; mb_x<s->b_width; mb_x++) |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3045 encode_q_branch(s, 0, mb_x, mb_y); |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3046 s->c = r; |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3047 memcpy(s->block_state, state, sizeof(s->block_state)); |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3048 } |
c30e9bcbb716
seed iterative_me with mvs from conventional search.
lorenm
parents:
3193
diff
changeset
|
3049 |
3323
87c54a3f8d19
Snow: fix subband weighting in wavelet cmp functions. use 32x32 cmp in iterative motion estimation.
lorenm
parents:
3322
diff
changeset
|
3050 for(pass=0; pass<25; pass++){ |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3051 int change= 0; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3052 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3053 for(mb_y= 0; mb_y<b_height; mb_y++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3054 for(mb_x= 0; mb_x<b_width; mb_x++){ |
3314 | 3055 int dia_change, i, j, ref; |
3056 int best_rd= INT_MAX, ref_rd; | |
3057 BlockNode backup, ref_b; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3058 const int index= mb_x + mb_y * b_stride; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3059 BlockNode *block= &s->block[index]; |
3324 | 3060 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL; |
3061 BlockNode *lb = mb_x ? &s->block[index -1] : NULL; | |
3062 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL; | |
3063 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL; | |
3064 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL; | |
3065 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL; | |
3066 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL; | |
3067 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL; | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3068 const int b_w= (MB_SIZE >> s->block_max_depth); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3069 uint8_t obmc_edged[b_w*2][b_w*2]; |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3070 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3071 if(pass && (block->type & BLOCK_OPT)) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3072 continue; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3073 block->type |= BLOCK_OPT; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3074 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3075 backup= *block; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3076 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3077 if(!s->me_cache_generation) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3078 memset(s->me_cache, 0, sizeof(s->me_cache)); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3079 s->me_cache_generation += 1<<22; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3080 |
6412 | 3081 //FIXME precalculate |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3082 { |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3083 int x, y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3084 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3085 if(mb_x==0) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3086 for(y=0; y<b_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3087 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3088 if(mb_x==b_stride-1) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3089 for(y=0; y<b_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3090 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3091 if(mb_y==0){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3092 for(x=0; x<b_w*2; x++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3093 obmc_edged[0][x] += obmc_edged[b_w-1][x]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3094 for(y=1; y<b_w; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3095 memcpy(obmc_edged[y], obmc_edged[0], b_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3096 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3097 if(mb_y==b_height-1){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3098 for(x=0; x<b_w*2; x++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3099 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3100 for(y=b_w; y<b_w*2-1; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3101 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3102 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3103 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3104 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3105 //skip stuff outside the picture |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3106 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3107 { |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3108 uint8_t *src= s-> input_picture.data[0]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3109 uint8_t *dst= s->current_picture.data[0]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3110 const int stride= s->current_picture.linesize[0]; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3111 const int block_w= MB_SIZE >> s->block_max_depth; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3112 const int sx= block_w*mb_x - block_w/2; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3113 const int sy= block_w*mb_y - block_w/2; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3114 const int w= s->plane[0].width; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3115 const int h= s->plane[0].height; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3116 int y; |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3117 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3118 for(y=sy; y<0; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3119 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3120 for(y=h; y<sy+block_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3121 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3122 if(sx<0){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3123 for(y=sy; y<sy+block_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3124 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3125 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3126 if(sx+block_w*2 > w){ |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3127 for(y=sy; y<sy+block_w*2; y++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3128 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3129 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3130 } |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3131 |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3132 // intra(black) = neighbors' contribution to the current block |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3133 for(i=0; i<3; i++) |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3134 color[i]= get_dc(s, mb_x, mb_y, i); |
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3135 |
5127 | 3136 // get previous score (cannot be cached due to OBMC) |
3057 | 3137 if(pass > 0 && (block->type&BLOCK_INTRA)){ |
3138 int color0[3]= {block->color[0], block->color[1], block->color[2]}; | |
3139 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd); | |
3140 }else | |
3197 | 3141 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd); |
3142 | |
3314 | 3143 ref_b= *block; |
3144 ref_rd= best_rd; | |
3145 for(ref=0; ref < s->ref_frames; ref++){ | |
3146 int16_t (*mvr)[2]= &s->ref_mvs[ref][index]; | |
3147 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold | |
3148 continue; | |
3149 block->ref= ref; | |
3150 best_rd= INT_MAX; | |
3151 | |
3152 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd); | |
3153 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd); | |
3324 | 3154 if(tb) |
3314 | 3155 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd); |
3324 | 3156 if(lb) |
3314 | 3157 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd); |
3324 | 3158 if(rb) |
3314 | 3159 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd); |
3324 | 3160 if(bb) |
3314 | 3161 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd); |
3162 | |
3163 /* fullpel ME */ | |
6412 | 3164 //FIXME avoid subpel interpolation / round to nearest integer |
3314 | 3165 do{ |
3166 dia_change=0; | |
3167 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){ | |
3168 for(j=0; j<i; j++){ | |
3169 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); | |
3170 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); | |
3171 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); | |
3172 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); | |
3173 } | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3174 } |
3314 | 3175 }while(dia_change); |
3176 /* subpel ME */ | |
3177 do{ | |
3178 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},}; | |
3179 dia_change=0; | |
3180 for(i=0; i<8; i++) | |
3181 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd); | |
3182 }while(dia_change); | |
3183 //FIXME or try the standard 2 pass qpel or similar | |
3184 | |
3185 mvr[0][0]= block->mx; | |
3186 mvr[0][1]= block->my; | |
3187 if(ref_rd > best_rd){ | |
3188 ref_rd= best_rd; | |
3189 ref_b= *block; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3190 } |
3314 | 3191 } |
3192 best_rd= ref_rd; | |
3193 *block= ref_b; | |
2998 | 3194 #if 1 |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
3195 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd); |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3196 //FIXME RD style color selection |
2998 | 3197 #endif |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3198 if(!same_block(block, &backup)){ |
3324 | 3199 if(tb ) tb ->type &= ~BLOCK_OPT; |
3200 if(lb ) lb ->type &= ~BLOCK_OPT; | |
3201 if(rb ) rb ->type &= ~BLOCK_OPT; | |
3202 if(bb ) bb ->type &= ~BLOCK_OPT; | |
3203 if(tlb) tlb->type &= ~BLOCK_OPT; | |
3204 if(trb) trb->type &= ~BLOCK_OPT; | |
3205 if(blb) blb->type &= ~BLOCK_OPT; | |
3206 if(brb) brb->type &= ~BLOCK_OPT; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3207 change ++; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3208 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3209 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3210 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3211 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3212 if(!change) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3213 break; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3214 } |
3051 | 3215 |
3216 if(s->block_max_depth == 1){ | |
3217 int change= 0; | |
3218 for(mb_y= 0; mb_y<b_height; mb_y+=2){ | |
3219 for(mb_x= 0; mb_x<b_width; mb_x+=2){ | |
3324 | 3220 int i; |
3051 | 3221 int best_rd, init_rd; |
3222 const int index= mb_x + mb_y * b_stride; | |
3223 BlockNode *b[4]; | |
3224 | |
3225 b[0]= &s->block[index]; | |
3226 b[1]= b[0]+1; | |
3227 b[2]= b[0]+b_stride; | |
3228 b[3]= b[2]+1; | |
3229 if(same_block(b[0], b[1]) && | |
3230 same_block(b[0], b[2]) && | |
3231 same_block(b[0], b[3])) | |
3232 continue; | |
3233 | |
3234 if(!s->me_cache_generation) | |
3235 memset(s->me_cache, 0, sizeof(s->me_cache)); | |
3236 s->me_cache_generation += 1<<22; | |
3237 | |
3238 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0); | |
3239 | |
3314 | 3240 //FIXME more multiref search? |
3051 | 3241 check_4block_inter(s, mb_x, mb_y, |
3242 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2, | |
3314 | 3243 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd); |
3051 | 3244 |
3245 for(i=0; i<4; i++) | |
3246 if(!(b[i]->type&BLOCK_INTRA)) | |
3314 | 3247 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd); |
3051 | 3248 |
3249 if(init_rd != best_rd) | |
3250 change++; | |
3251 } | |
3252 } | |
3253 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4); | |
3254 } | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3255 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
3256 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3257 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){ |
2138 | 3258 const int w= b->width; |
3259 const int h= b->height; | |
4594 | 3260 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
5575 | 3261 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS); |
2150 | 3262 int x,y, thres1, thres2; |
2138 | 3263 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3264 if(s->qlog == LOSSLESS_QLOG){ |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3265 for(y=0; y<h; y++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3266 for(x=0; x<w; x++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3267 dst[x + y*stride]= src[x + y*stride]; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3268 return; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3269 } |
2967 | 3270 |
2138 | 3271 bias= bias ? 0 : (3*qmul)>>3; |
2150 | 3272 thres1= ((qmul - bias)>>QEXPSHIFT) - 1; |
3273 thres2= 2*thres1; | |
2967 | 3274 |
2138 | 3275 if(!bias){ |
3276 for(y=0; y<h; y++){ | |
3277 for(x=0; x<w; x++){ | |
2150 | 3278 int i= src[x + y*stride]; |
2967 | 3279 |
2150 | 3280 if((unsigned)(i+thres1) > thres2){ |
3281 if(i>=0){ | |
3282 i<<= QEXPSHIFT; | |
3283 i/= qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3284 dst[x + y*stride]= i; |
2150 | 3285 }else{ |
3286 i= -i; | |
3287 i<<= QEXPSHIFT; | |
3288 i/= qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3289 dst[x + y*stride]= -i; |
2150 | 3290 } |
3291 }else | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3292 dst[x + y*stride]= 0; |
2138 | 3293 } |
3294 } | |
3295 }else{ | |
3296 for(y=0; y<h; y++){ | |
3297 for(x=0; x<w; x++){ | |
2967 | 3298 int i= src[x + y*stride]; |
3299 | |
2150 | 3300 if((unsigned)(i+thres1) > thres2){ |
3301 if(i>=0){ | |
3302 i<<= QEXPSHIFT; | |
3303 i= (i + bias) / qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3304 dst[x + y*stride]= i; |
2150 | 3305 }else{ |
3306 i= -i; | |
3307 i<<= QEXPSHIFT; | |
3308 i= (i + bias) / qmul; //FIXME optimize | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3309 dst[x + y*stride]= -i; |
2150 | 3310 } |
3311 }else | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3312 dst[x + y*stride]= 0; |
2138 | 3313 } |
3314 } | |
3315 } | |
3316 } | |
3317 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3318 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3319 const int w= b->width; |
4594 | 3320 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
2600 | 3321 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3322 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3323 int x,y; |
2967 | 3324 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3325 if(s->qlog == LOSSLESS_QLOG) return; |
2967 | 3326 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3327 for(y=start_y; y<end_y; y++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3328 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3329 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3330 for(x=0; x<w; x++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3331 int i= line[x]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3332 if(i<0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3333 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3334 }else if(i>0){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3335 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3336 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3337 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3338 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3339 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3340 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3341 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){ |
2138 | 3342 const int w= b->width; |
3343 const int h= b->height; | |
4594 | 3344 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16); |
2600 | 3345 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
2138 | 3346 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; |
3347 int x,y; | |
2967 | 3348 |
2161 | 3349 if(s->qlog == LOSSLESS_QLOG) return; |
2967 | 3350 |
2138 | 3351 for(y=0; y<h; y++){ |
3352 for(x=0; x<w; x++){ | |
3353 int i= src[x + y*stride]; | |
3354 if(i<0){ | |
3355 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias | |
3356 }else if(i>0){ | |
3357 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT)); | |
3358 } | |
3359 } | |
3360 } | |
3361 } | |
3362 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3363 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ |
2138 | 3364 const int w= b->width; |
3365 const int h= b->height; | |
3366 int x,y; | |
2967 | 3367 |
2138 | 3368 for(y=h-1; y>=0; y--){ |
3369 for(x=w-1; x>=0; x--){ | |
3370 int i= x + y*stride; | |
2967 | 3371 |
2138 | 3372 if(x){ |
3373 if(use_median){ | |
3374 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); | |
3375 else src[i] -= src[i - 1]; | |
3376 }else{ | |
3377 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); | |
3378 else src[i] -= src[i - 1]; | |
3379 } | |
3380 }else{ | |
3381 if(y) src[i] -= src[i - stride]; | |
3382 } | |
3383 } | |
3384 } | |
3385 } | |
3386 | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3387 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3388 const int w= b->width; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3389 int x,y; |
2967 | 3390 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3391 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3392 IDWTELEM * prev; |
2967 | 3393 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3394 if (start_y != 0) |
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3395 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
2967 | 3396 |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
3397 for(y=start_y; y<end_y; y++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3398 prev = line; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3399 // line = slice_buffer_get_line_from_address(sb, src + (y * stride)); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3400 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3401 for(x=0; x<w; x++){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3402 if(x){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3403 if(use_median){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3404 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3405 else line[x] += line[x - 1]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3406 }else{ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3407 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3408 else line[x] += line[x - 1]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3409 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3410 }else{ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3411 if(y) line[x] += prev[x]; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3412 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3413 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3414 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3415 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3416 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3417 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){ |
2138 | 3418 const int w= b->width; |
3419 const int h= b->height; | |
3420 int x,y; | |
2967 | 3421 |
2138 | 3422 for(y=0; y<h; y++){ |
3423 for(x=0; x<w; x++){ | |
3424 int i= x + y*stride; | |
2967 | 3425 |
2138 | 3426 if(x){ |
3427 if(use_median){ | |
3428 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); | |
3429 else src[i] += src[i - 1]; | |
3430 }else{ | |
3431 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); | |
3432 else src[i] += src[i - 1]; | |
3433 } | |
3434 }else{ | |
3435 if(y) src[i] += src[i - stride]; | |
3436 } | |
3437 } | |
3438 } | |
3439 } | |
3440 | |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3441 static void encode_qlogs(SnowContext *s){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3442 int plane_index, level, orientation; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3443 |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3444 for(plane_index=0; plane_index<2; plane_index++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3445 for(level=0; level<s->spatial_decomposition_count; level++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3446 for(orientation=level ? 1:0; orientation<4; orientation++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3447 if(orientation==2) continue; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3448 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3449 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3450 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3451 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3452 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3453 |
2138 | 3454 static void encode_header(SnowContext *s){ |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3455 int plane_index, i; |
2967 | 3456 uint8_t kstate[32]; |
3457 | |
3458 memset(kstate, MID_STATE, sizeof(kstate)); | |
2138 | 3459 |
2335 | 3460 put_rac(&s->c, kstate, s->keyframe); |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3461 if(s->keyframe || s->always_reset){ |
2199 | 3462 reset_contexts(s); |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3463 s->last_spatial_decomposition_type= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3464 s->last_qlog= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3465 s->last_qbias= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3466 s->last_mv_scale= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3467 s->last_block_max_depth= 0; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3468 for(plane_index=0; plane_index<2; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3469 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3470 p->last_htaps=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3471 p->last_diag_mc=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3472 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3473 } |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3474 } |
2138 | 3475 if(s->keyframe){ |
3476 put_symbol(&s->c, s->header_state, s->version, 0); | |
2335 | 3477 put_rac(&s->c, s->header_state, s->always_reset); |
2138 | 3478 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0); |
3479 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0); | |
3480 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); | |
3481 put_symbol(&s->c, s->header_state, s->colorspace_type, 0); | |
3482 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0); | |
3483 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0); | |
2335 | 3484 put_rac(&s->c, s->header_state, s->spatial_scalability); |
3485 // put_rac(&s->c, s->header_state, s->rate_scalability); | |
3314 | 3486 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0); |
2138 | 3487 |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3488 encode_qlogs(s); |
2138 | 3489 } |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3490 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3491 if(!s->keyframe){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3492 int update_mc=0; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3493 for(plane_index=0; plane_index<2; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3494 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3495 update_mc |= p->last_htaps != p->htaps; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3496 update_mc |= p->last_diag_mc != p->diag_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3497 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3498 } |
5667
9242e125395f
do not force the halfpel filter coeffs to be retransmitted on every frame
michael
parents:
5666
diff
changeset
|
3499 put_rac(&s->c, s->header_state, update_mc); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3500 if(update_mc){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3501 for(plane_index=0; plane_index<2; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3502 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3503 put_rac(&s->c, s->header_state, p->diag_mc); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3504 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3505 for(i= p->htaps/2; i; i--) |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3506 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3507 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3508 } |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3509 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3510 put_rac(&s->c, s->header_state, 1); |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3511 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3512 encode_qlogs(s); |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3513 }else |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
3514 put_rac(&s->c, s->header_state, 0); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3515 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3516 |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3517 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3518 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3519 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3520 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3521 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3522 |
6144 | 3523 } |
3524 | |
3525 static void update_last_header_values(SnowContext *s){ | |
3526 int plane_index; | |
3527 | |
3528 if(!s->keyframe){ | |
3529 for(plane_index=0; plane_index<2; plane_index++){ | |
3530 Plane *p= &s->plane[plane_index]; | |
3531 p->last_diag_mc= p->diag_mc; | |
3532 p->last_htaps = p->htaps; | |
3533 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); | |
3534 } | |
3535 } | |
3536 | |
6414 | 3537 s->last_spatial_decomposition_type = s->spatial_decomposition_type; |
3538 s->last_qlog = s->qlog; | |
3539 s->last_qbias = s->qbias; | |
3540 s->last_mv_scale = s->mv_scale; | |
3541 s->last_block_max_depth = s->block_max_depth; | |
3542 s->last_spatial_decomposition_count = s->spatial_decomposition_count; | |
2138 | 3543 } |
3544 | |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3545 static void decode_qlogs(SnowContext *s){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3546 int plane_index, level, orientation; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3547 |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3548 for(plane_index=0; plane_index<3; plane_index++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3549 for(level=0; level<s->spatial_decomposition_count; level++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3550 for(orientation=level ? 1:0; orientation<4; orientation++){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3551 int q; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3552 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3553 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3554 else q= get_symbol(&s->c, s->header_state, 1); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3555 s->plane[plane_index].band[level][orientation].qlog= q; |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3556 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3557 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3558 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3559 } |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3560 |
2138 | 3561 static int decode_header(SnowContext *s){ |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3562 int plane_index; |
2335 | 3563 uint8_t kstate[32]; |
2138 | 3564 |
2967 | 3565 memset(kstate, MID_STATE, sizeof(kstate)); |
2335 | 3566 |
3567 s->keyframe= get_rac(&s->c, kstate); | |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3568 if(s->keyframe || s->always_reset){ |
2199 | 3569 reset_contexts(s); |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3570 s->spatial_decomposition_type= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3571 s->qlog= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3572 s->qbias= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3573 s->mv_scale= |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3574 s->block_max_depth= 0; |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3575 } |
2138 | 3576 if(s->keyframe){ |
3577 s->version= get_symbol(&s->c, s->header_state, 0); | |
3578 if(s->version>0){ | |
3579 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version); | |
3580 return -1; | |
3581 } | |
2335 | 3582 s->always_reset= get_rac(&s->c, s->header_state); |
2138 | 3583 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0); |
3584 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0); | |
3585 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0); | |
3586 s->colorspace_type= get_symbol(&s->c, s->header_state, 0); | |
3587 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0); | |
3588 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0); | |
2335 | 3589 s->spatial_scalability= get_rac(&s->c, s->header_state); |
3590 // s->rate_scalability= get_rac(&s->c, s->header_state); | |
3314 | 3591 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1; |
2138 | 3592 |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3593 decode_qlogs(s); |
2138 | 3594 } |
2967 | 3595 |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3596 if(!s->keyframe){ |
5667
9242e125395f
do not force the halfpel filter coeffs to be retransmitted on every frame
michael
parents:
5666
diff
changeset
|
3597 if(get_rac(&s->c, s->header_state)){ |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3598 for(plane_index=0; plane_index<2; plane_index++){ |
5807 | 3599 int htaps, i, sum=0; |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3600 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3601 p->diag_mc= get_rac(&s->c, s->header_state); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3602 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2; |
5652
941e5deeb2a4
rename HTAPS -> HTAPS_MAX (later is correct after the previous change)
michael
parents:
5651
diff
changeset
|
3603 if((unsigned)htaps > HTAPS_MAX || htaps==0) |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3604 return -1; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3605 p->htaps= htaps; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3606 for(i= htaps/2; i; i--){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3607 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3608 sum += p->hcoeff[i]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3609 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3610 p->hcoeff[0]= 32-sum; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3611 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3612 s->plane[2].diag_mc= s->plane[1].diag_mc; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3613 s->plane[2].htaps = s->plane[1].htaps; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3614 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff)); |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3615 } |
5666
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3616 if(get_rac(&s->c, s->header_state)){ |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3617 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3618 decode_qlogs(s); |
b5c137f3f53a
allow changing of the spatial_decomposition_count and quantization tables
michael
parents:
5665
diff
changeset
|
3619 } |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3620 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3621 |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3622 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1); |
5588
effa59ca89b3
we only have 2 wavelets, the 3rd was just for experimentation ...
michael
parents:
5587
diff
changeset
|
3623 if(s->spatial_decomposition_type > 1){ |
2138 | 3624 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type); |
3625 return -1; | |
3626 } | |
2967 | 3627 |
4331
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3628 s->qlog += get_symbol(&s->c, s->header_state, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3629 s->mv_scale += get_symbol(&s->c, s->header_state, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3630 s->qbias += get_symbol(&s->c, s->header_state, 1); |
e571dfe677be
store a few values in the header as difference to the last
michael
parents:
4283
diff
changeset
|
3631 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1); |
3303
68721b62a528
sanity checks, some might have been exploitable ...
michael
parents:
3206
diff
changeset
|
3632 if(s->block_max_depth > 1 || s->block_max_depth < 0){ |
2952 | 3633 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth); |
3634 s->block_max_depth= 0; | |
3635 return -1; | |
3636 } | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3637 |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3638 return 0; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3639 } |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3640 |
3075 | 3641 static void init_qexp(void){ |
2600 | 3642 int i; |
3643 double v=128; | |
3644 | |
3645 for(i=0; i<QROOT; i++){ | |
3646 qexp[i]= lrintf(v); | |
2967 | 3647 v *= pow(2, 1.0 / QROOT); |
2600 | 3648 } |
3649 } | |
3650 | |
2138 | 3651 static int common_init(AVCodecContext *avctx){ |
3652 SnowContext *s = avctx->priv_data; | |
3653 int width, height; | |
3325
c2a017de6bea
Snow: scale predicted mv based on which reference frame the neighbors used.
lorenm
parents:
3324
diff
changeset
|
3654 int i, j; |
2138 | 3655 |
3656 s->avctx= avctx; | |
2967 | 3657 |
2138 | 3658 dsputil_init(&s->dsp, avctx); |
3659 | |
3660 #define mcf(dx,dy)\ | |
3661 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\ | |
3662 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\ | |
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3663 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3664 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3665 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3666 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4]; |
2138 | 3667 |
3668 mcf( 0, 0) | |
3669 mcf( 4, 0) | |
3670 mcf( 8, 0) | |
3671 mcf(12, 0) | |
3672 mcf( 0, 4) | |
3673 mcf( 4, 4) | |
3674 mcf( 8, 4) | |
3675 mcf(12, 4) | |
3676 mcf( 0, 8) | |
3677 mcf( 4, 8) | |
3678 mcf( 8, 8) | |
3679 mcf(12, 8) | |
3680 mcf( 0,12) | |
3681 mcf( 4,12) | |
3682 mcf( 8,12) | |
3683 mcf(12,12) | |
3684 | |
3685 #define mcfh(dx,dy)\ | |
3686 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\ | |
3687 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\ | |
2224
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3688 mc_block_hpel ## dx ## dy ## 16;\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3689 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3690 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\ |
11d54cb7ac4e
100l (forgot to set the 8x8 mc functions, so ME did use the mpeg ones ...)
michael
parents:
2223
diff
changeset
|
3691 mc_block_hpel ## dx ## dy ## 8; |
2138 | 3692 |
3693 mcfh(0, 0) | |
3694 mcfh(8, 0) | |
3695 mcfh(0, 8) | |
3696 mcfh(8, 8) | |
2600 | 3697 |
3698 if(!qexp[0]) | |
3699 init_qexp(); | |
3700 | |
2138 | 3701 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift); |
2967 | 3702 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3703 width= s->avctx->width; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3704 height= s->avctx->height; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
3705 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3706 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM)); |
6412 | 3707 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here |
2967 | 3708 |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3709 for(i=0; i<MAX_REF_FRAMES; i++) |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3710 for(j=0; j<MAX_REF_FRAMES; j++) |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3711 scale_mv_ref[i][j] = 256*(i+1)/(j+1); |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3712 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3713 s->avctx->get_buffer(s->avctx, &s->mconly_picture); |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3714 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3715 return 0; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3716 } |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3717 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3718 static int common_init_after_header(AVCodecContext *avctx){ |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3719 SnowContext *s = avctx->priv_data; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3720 int plane_index, level, orientation; |
2967 | 3721 |
3722 for(plane_index=0; plane_index<3; plane_index++){ | |
2138 | 3723 int w= s->avctx->width; |
3724 int h= s->avctx->height; | |
3725 | |
3726 if(plane_index){ | |
3727 w>>= s->chroma_h_shift; | |
3728 h>>= s->chroma_v_shift; | |
3729 } | |
3730 s->plane[plane_index].width = w; | |
3731 s->plane[plane_index].height= h; | |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
3732 |
2138 | 3733 for(level=s->spatial_decomposition_count-1; level>=0; level--){ |
3734 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3735 SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
2967 | 3736 |
2138 | 3737 b->buf= s->spatial_dwt_buffer; |
3738 b->level= level; | |
3739 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level); | |
3740 b->width = (w + !(orientation&1))>>1; | |
3741 b->height= (h + !(orientation>1))>>1; | |
2967 | 3742 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3743 b->stride_line = 1 << (s->spatial_decomposition_count - level); |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3744 b->buf_x_offset = 0; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3745 b->buf_y_offset = 0; |
2967 | 3746 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3747 if(orientation&1){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3748 b->buf += (w+1)>>1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3749 b->buf_x_offset = (w+1)>>1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3750 } |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3751 if(orientation>1){ |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3752 b->buf += b->stride>>1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3753 b->buf_y_offset = b->stride_line >> 1; |
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3754 } |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3755 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer); |
2967 | 3756 |
2138 | 3757 if(level) |
3758 b->parent= &s->plane[plane_index].band[level-1][orientation]; | |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3759 //FIXME avoid this realloc |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3760 av_freep(&b->x_coeff); |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
3761 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff)); |
2138 | 3762 } |
3763 w= (w+1)>>1; | |
3764 h= (h+1)>>1; | |
3765 } | |
3766 } | |
2967 | 3767 |
2138 | 3768 return 0; |
3769 } | |
3770 | |
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3771 static int qscale2qlog(int qscale){ |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3772 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2)) |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3773 + 61*QROOT/8; //<64 >60 |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3774 } |
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
3775 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3776 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict) |
3313 | 3777 { |
6412 | 3778 /* Estimate the frame's complexity as a sum of weighted dwt coefficients. |
3313 | 3779 * FIXME we know exact mv bits at this point, |
3780 * but ratecontrol isn't set up to include them. */ | |
3781 uint32_t coef_sum= 0; | |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3782 int level, orientation, delta_qlog; |
3313 | 3783 |
3784 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3785 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3786 SubBand *b= &s->plane[0].band[level][orientation]; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3787 IDWTELEM *buf= b->ibuf; |
3313 | 3788 const int w= b->width; |
3789 const int h= b->height; | |
3790 const int stride= b->stride; | |
4594 | 3791 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16); |
3313 | 3792 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); |
3793 const int qdiv= (1<<16)/qmul; | |
3794 int x, y; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3795 //FIXME this is ugly |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3796 for(y=0; y<h; y++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3797 for(x=0; x<w; x++) |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3798 buf[x+y*stride]= b->buf[x+y*stride]; |
3313 | 3799 if(orientation==0) |
3800 decorrelate(s, b, buf, stride, 1, 0); | |
3801 for(y=0; y<h; y++) | |
3802 for(x=0; x<w; x++) | |
3803 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16; | |
3804 } | |
3805 } | |
3806 | |
3807 /* ugly, ratecontrol just takes a sqrt again */ | |
3808 coef_sum = (uint64_t)coef_sum * coef_sum >> 16; | |
3809 assert(coef_sum < INT_MAX); | |
3810 | |
3811 if(pict->pict_type == I_TYPE){ | |
3812 s->m.current_picture.mb_var_sum= coef_sum; | |
3813 s->m.current_picture.mc_mb_var_sum= 0; | |
3814 }else{ | |
3815 s->m.current_picture.mc_mb_var_sum= coef_sum; | |
3816 s->m.current_picture.mb_var_sum= 0; | |
3817 } | |
3818 | |
3819 pict->quality= ff_rate_estimate_qscale(&s->m, 1); | |
3766 | 3820 if (pict->quality < 0) |
4011
5bce97c30a69
-1 is a valid return value in ratecontrol_1pass() -> 100l for takis
michael
parents:
4001
diff
changeset
|
3821 return INT_MIN; |
3313 | 3822 s->lambda= pict->quality * 3/2; |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3823 delta_qlog= qscale2qlog(pict->quality) - s->qlog; |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3824 s->qlog+= delta_qlog; |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
3825 return delta_qlog; |
3313 | 3826 } |
2138 | 3827 |
5909 | 3828 static void calculate_visual_weight(SnowContext *s, Plane *p){ |
2138 | 3829 int width = p->width; |
3830 int height= p->height; | |
2198 | 3831 int level, orientation, x, y; |
2138 | 3832 |
3833 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3834 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3835 SubBand *b= &p->band[level][orientation]; | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3836 IDWTELEM *ibuf= b->ibuf; |
2138 | 3837 int64_t error=0; |
2967 | 3838 |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3839 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3840 ibuf[b->width/2 + b->height/2*b->stride]= 256*16; |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3841 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); |
2138 | 3842 for(y=0; y<height; y++){ |
3843 for(x=0; x<width; x++){ | |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
3844 int64_t d= s->spatial_idwt_buffer[x + y*width]*16; |
2138 | 3845 error += d*d; |
3846 } | |
3847 } | |
3848 | |
3849 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5); | |
3850 } | |
3851 } | |
3852 } | |
3853 | |
5702 | 3854 #define QUANTIZE2 0 |
3855 | |
3856 #if QUANTIZE2==1 | |
3857 #define Q2_STEP 8 | |
3858 | |
3859 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){ | |
3860 SubBand *b= &p->band[level][orientation]; | |
3861 int x, y; | |
3862 int xo=0; | |
3863 int yo=0; | |
3864 int step= 1 << (s->spatial_decomposition_count - level); | |
3865 | |
3866 if(orientation&1) | |
3867 xo= step>>1; | |
3868 if(orientation&2) | |
3869 yo= step>>1; | |
3870 | |
6412 | 3871 //FIXME bias for nonzero ? |
5702 | 3872 //FIXME optimize |
3873 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP)); | |
3874 for(y=0; y<p->height; y++){ | |
3875 for(x=0; x<p->width; x++){ | |
3876 int sx= (x-xo + step/2) / step / Q2_STEP; | |
3877 int sy= (y-yo + step/2) / step / Q2_STEP; | |
3878 int v= r0[x + y*p->width] - r1[x + y*p->width]; | |
3879 assert(sx>=0 && sy>=0 && sx < score_stride); | |
3880 v= ((v+8)>>4)<<4; | |
3881 score[sx + sy*score_stride] += v*v; | |
3882 assert(score[sx + sy*score_stride] >= 0); | |
3883 } | |
3884 } | |
3885 } | |
3886 | |
3887 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){ | |
3888 int level, orientation; | |
3889 | |
3890 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3891 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3892 SubBand *b= &p->band[level][orientation]; | |
3893 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer); | |
3894 | |
3895 dequantize(s, b, dst, b->stride); | |
3896 } | |
3897 } | |
3898 } | |
3899 | |
3900 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){ | |
3901 int level, orientation, ys, xs, x, y, pass; | |
3902 IDWTELEM best_dequant[height * stride]; | |
3903 IDWTELEM idwt2_buffer[height * stride]; | |
3904 const int score_stride= (width + 10)/Q2_STEP; | |
3905 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size | |
3906 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size | |
3907 int threshold= (s->m.lambda * s->m.lambda) >> 6; | |
3908 | |
3909 //FIXME pass the copy cleanly ? | |
3910 | |
3911 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM)); | |
3912 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count); | |
3913 | |
3914 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3915 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3916 SubBand *b= &p->band[level][orientation]; | |
3917 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); | |
3918 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer); | |
6412 | 3919 assert(src == b->buf); // code does not depend on this but it is true currently |
5702 | 3920 |
3921 quantize(s, b, dst, src, b->stride, s->qbias); | |
3922 } | |
3923 } | |
3924 for(pass=0; pass<1; pass++){ | |
3925 if(s->qbias == 0) //keyframe | |
3926 continue; | |
3927 for(level=0; level<s->spatial_decomposition_count; level++){ | |
3928 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
3929 SubBand *b= &p->band[level][orientation]; | |
3930 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer); | |
3931 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer); | |
3932 | |
3933 for(ys= 0; ys<Q2_STEP; ys++){ | |
3934 for(xs= 0; xs<Q2_STEP; xs++){ | |
3935 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); | |
3936 dequantize_all(s, p, idwt2_buffer, width, height); | |
3937 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); | |
3938 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); | |
3939 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); | |
3940 for(y=ys; y<b->height; y+= Q2_STEP){ | |
3941 for(x=xs; x<b->width; x+= Q2_STEP){ | |
3942 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++; | |
3943 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--; | |
6412 | 3944 //FIXME try more than just -- |
5702 | 3945 } |
3946 } | |
3947 dequantize_all(s, p, idwt2_buffer, width, height); | |
3948 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count); | |
3949 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation); | |
3950 for(y=ys; y<b->height; y+= Q2_STEP){ | |
3951 for(x=xs; x<b->width; x+= Q2_STEP){ | |
3952 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride; | |
3953 if(score[score_idx] <= best_score[score_idx] + threshold){ | |
3954 best_score[score_idx]= score[score_idx]; | |
3955 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++; | |
3956 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--; | |
3957 //FIXME copy instead | |
3958 } | |
3959 } | |
3960 } | |
3961 } | |
3962 } | |
3963 } | |
3964 } | |
3965 } | |
6412 | 3966 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end |
5702 | 3967 } |
3968 | |
5910 | 3969 #endif /* QUANTIZE2==1 */ |
5702 | 3970 |
2138 | 3971 static int encode_init(AVCodecContext *avctx) |
3972 { | |
3973 SnowContext *s = avctx->priv_data; | |
2198 | 3974 int plane_index; |
2138 | 3975 |
2658
d1609cfeb1d0
#defines for strict_std_compliance and split between inofficial extensions and non standarized things
michael
parents:
2635
diff
changeset
|
3976 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){ |
6412 | 3977 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n" |
3978 "Use vstrict=-2 / -strict -2 to use it anyway.\n"); | |
2151 | 3979 return -1; |
3980 } | |
2967 | 3981 |
3327
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3982 if(avctx->prediction_method == DWT_97 |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3983 && (avctx->flags & CODEC_FLAG_QSCALE) |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3984 && avctx->global_quality == 0){ |
6412 | 3985 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n"); |
3327
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3986 return -1; |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3987 } |
955096780e7c
Snow: don't try to encode lossless with 9/7 wavelet, because it isn't lossless.
lorenm
parents:
3326
diff
changeset
|
3988 |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3989 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3990 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3991 s->chroma_h_shift= 1; //FIXME XXX |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3992 s->chroma_v_shift= 1; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3993 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3994 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3995 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3996 |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3997 for(plane_index=0; plane_index<3; plane_index++){ |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3998 s->plane[plane_index].diag_mc= 1; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
3999 s->plane[plane_index].htaps= 6; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4000 s->plane[plane_index].hcoeff[0]= 40; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4001 s->plane[plane_index].hcoeff[1]= -10; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4002 s->plane[plane_index].hcoeff[2]= 2; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4003 s->plane[plane_index].fast_mc= 1; |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4004 } |
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4005 |
2138 | 4006 common_init(avctx); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4007 alloc_blocks(s); |
2967 | 4008 |
2138 | 4009 s->version=0; |
2967 | 4010 |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4011 s->m.avctx = avctx; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4012 s->m.flags = avctx->flags; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4013 s->m.bit_rate= avctx->bit_rate; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4014 |
2138 | 4015 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); |
4016 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
4017 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
4018 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t)); |
2138 | 4019 h263_encode_init(&s->m); //mv_penalty |
4020 | |
3314 | 4021 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1); |
4022 | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4023 if(avctx->flags&CODEC_FLAG_PASS1){ |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4024 if(!avctx->stats_out) |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4025 avctx->stats_out = av_mallocz(256); |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4026 } |
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
4027 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){ |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4028 if(ff_rate_control_init(&s->m) < 0) |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4029 return -1; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4030 } |
3313 | 4031 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2)); |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4032 |
2138 | 4033 avctx->coded_frame= &s->current_picture; |
4034 switch(avctx->pix_fmt){ | |
4035 // case PIX_FMT_YUV444P: | |
4036 // case PIX_FMT_YUV422P: | |
4037 case PIX_FMT_YUV420P: | |
4038 case PIX_FMT_GRAY8: | |
4039 // case PIX_FMT_YUV411P: | |
4040 // case PIX_FMT_YUV410P: | |
4041 s->colorspace_type= 0; | |
4042 break; | |
4494
ce643a22f049
Replace deprecated PIX_FMT names by the newer variants.
diego
parents:
4436
diff
changeset
|
4043 /* case PIX_FMT_RGB32: |
2138 | 4044 s->colorspace= 1; |
4045 break;*/ | |
4046 default: | |
5908 | 4047 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n"); |
2138 | 4048 return -1; |
4049 } | |
4050 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); | |
4051 s->chroma_h_shift= 1; | |
4052 s->chroma_v_shift= 1; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4053 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4054 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4055 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4056 |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4057 s->avctx->get_buffer(s->avctx, &s->input_picture); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4058 |
3314 | 4059 if(s->avctx->me_method == ME_ITER){ |
4060 int i; | |
4061 int size= s->b_width * s->b_height << 2*s->block_max_depth; | |
4062 for(i=0; i<s->max_ref_frames; i++){ | |
4063 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2])); | |
4064 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t)); | |
4065 } | |
4066 } | |
4067 | |
2138 | 4068 return 0; |
4069 } | |
4070 | |
5924 | 4071 #define USE_HALFPEL_PLANE 0 |
4072 | |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4073 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4074 int p,x,y; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4075 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4076 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE)); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4077 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4078 for(p=0; p<3; p++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4079 int is_chroma= !!p; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4080 int w= s->avctx->width >>is_chroma; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4081 int h= s->avctx->height >>is_chroma; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4082 int ls= frame->linesize[p]; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4083 uint8_t *src= frame->data[p]; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4084 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4085 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4086 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4087 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4088 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4089 halfpel[0][p]= src; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4090 for(y=0; y<h; y++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4091 for(x=0; x<w; x++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4092 int i= y*ls + x; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4093 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4094 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4095 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4096 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4097 for(y=0; y<h; y++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4098 for(x=0; x<w; x++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4099 int i= y*ls + x; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4100 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4101 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4102 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4103 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4104 src= halfpel[1][p]; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4105 for(y=0; y<h; y++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4106 for(x=0; x<w; x++){ |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4107 int i= y*ls + x; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4108 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4109 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5; |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4110 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4111 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4112 |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4113 //FIXME border! |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4114 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4115 } |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4116 |
2138 | 4117 static int frame_start(SnowContext *s){ |
4118 AVFrame tmp; | |
2187 | 4119 int w= s->avctx->width; //FIXME round up to x16 ? |
4120 int h= s->avctx->height; | |
2138 | 4121 |
2187 | 4122 if(s->current_picture.data[0]){ |
4123 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH ); | |
4124 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2); | |
4125 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2); | |
4126 } | |
4127 | |
3314 | 4128 tmp= s->last_picture[s->max_ref_frames-1]; |
4129 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame)); | |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4130 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4); |
5924 | 4131 if(USE_HALFPEL_PLANE && s->current_picture.data[0]) |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4132 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture); |
3314 | 4133 s->last_picture[0]= s->current_picture; |
2138 | 4134 s->current_picture= tmp; |
2967 | 4135 |
3314 | 4136 if(s->keyframe){ |
4137 s->ref_frames= 0; | |
4138 }else{ | |
4139 int i; | |
4140 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++) | |
4141 if(i && s->last_picture[i-1].key_frame) | |
4142 break; | |
4143 s->ref_frames= i; | |
4144 } | |
4145 | |
2138 | 4146 s->current_picture.reference= 1; |
4147 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){ | |
4148 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | |
4149 return -1; | |
4150 } | |
2967 | 4151 |
3314 | 4152 s->current_picture.key_frame= s->keyframe; |
4153 | |
2138 | 4154 return 0; |
4155 } | |
4156 | |
4157 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ | |
4158 SnowContext *s = avctx->priv_data; | |
2335 | 4159 RangeCoder * const c= &s->c; |
2138 | 4160 AVFrame *pict = data; |
4161 const int width= s->avctx->width; | |
4162 const int height= s->avctx->height; | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4163 int level, orientation, plane_index, i, y; |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4164 uint8_t rc_header_bak[sizeof(s->header_state)]; |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4165 uint8_t rc_block_bak[sizeof(s->block_state)]; |
2138 | 4166 |
2335 | 4167 ff_init_range_encoder(c, buf, buf_size); |
4168 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
2967 | 4169 |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4170 for(i=0; i<3; i++){ |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4171 int shift= !!i; |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4172 for(y=0; y<(height>>shift); y++) |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4173 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]], |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4174 &pict->data[i][y * pict->linesize[i]], |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4175 width>>shift); |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4176 } |
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4177 s->new_picture = *pict; |
2138 | 4178 |
3313 | 4179 s->m.picture_number= avctx->frame_number; |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4180 if(avctx->flags&CODEC_FLAG_PASS2){ |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4181 s->m.pict_type = |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4182 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4183 s->keyframe= pict->pict_type==FF_I_TYPE; |
3766 | 4184 if(!(avctx->flags&CODEC_FLAG_QSCALE)) { |
3193 | 4185 pict->quality= ff_rate_estimate_qscale(&s->m, 0); |
3766 | 4186 if (pict->quality < 0) |
4187 return -1; | |
4188 } | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4189 }else{ |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4190 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0; |
3313 | 4191 s->m.pict_type= |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4192 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4193 } |
2967 | 4194 |
3313 | 4195 if(s->pass1_rc && avctx->frame_number == 0) |
4196 pict->quality= 2*FF_QP2LAMBDA; | |
2161 | 4197 if(pict->quality){ |
3322
0b4f548dfb44
Snow: 10l. 1pass ratecontrol failed to set snow's internal quant.
lorenm
parents:
3314
diff
changeset
|
4198 s->qlog= qscale2qlog(pict->quality); |
3313 | 4199 s->lambda = pict->quality * 3/2; |
4200 } | |
4201 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){ | |
2161 | 4202 s->qlog= LOSSLESS_QLOG; |
3313 | 4203 s->lambda = 0; |
6412 | 4204 }//else keep previous frame's qlog until after motion estimation |
2138 | 4205 |
4206 frame_start(s); | |
4207 | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4208 s->m.current_picture_ptr= &s->m.current_picture; |
2138 | 4209 if(pict->pict_type == P_TYPE){ |
4210 int block_width = (width +15)>>4; | |
4211 int block_height= (height+15)>>4; | |
4212 int stride= s->current_picture.linesize[0]; | |
2967 | 4213 |
2138 | 4214 assert(s->current_picture.data[0]); |
3314 | 4215 assert(s->last_picture[0].data[0]); |
2967 | 4216 |
2138 | 4217 s->m.avctx= s->avctx; |
4218 s->m.current_picture.data[0]= s->current_picture.data[0]; | |
3314 | 4219 s->m. last_picture.data[0]= s->last_picture[0].data[0]; |
2138 | 4220 s->m. new_picture.data[0]= s-> input_picture.data[0]; |
4221 s->m. last_picture_ptr= &s->m. last_picture; | |
4222 s->m.linesize= | |
4223 s->m. last_picture.linesize[0]= | |
4224 s->m. new_picture.linesize[0]= | |
4225 s->m.current_picture.linesize[0]= stride; | |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4226 s->m.uvlinesize= s->current_picture.linesize[1]; |
2138 | 4227 s->m.width = width; |
4228 s->m.height= height; | |
4229 s->m.mb_width = block_width; | |
4230 s->m.mb_height= block_height; | |
4231 s->m.mb_stride= s->m.mb_width+1; | |
4232 s->m.b8_stride= 2*s->m.mb_width+1; | |
4233 s->m.f_code=1; | |
4234 s->m.pict_type= pict->pict_type; | |
4235 s->m.me_method= s->avctx->me_method; | |
4236 s->m.me.scene_change_score=0; | |
4237 s->m.flags= s->avctx->flags; | |
4238 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0; | |
4239 s->m.out_format= FMT_H263; | |
4240 s->m.unrestricted_mv= 1; | |
4241 | |
3313 | 4242 s->m.lambda = s->lambda; |
2138 | 4243 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4244 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT; |
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4245 |
2138 | 4246 s->m.dsp= s->dsp; //move |
4247 ff_init_me(&s->m); | |
2993
cb0e26759cca
iterative overlapped block based motion estimation for snow
michael
parents:
2979
diff
changeset
|
4248 s->dsp= s->m.dsp; |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4249 } |
2967 | 4250 |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4251 if(s->pass1_rc){ |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4252 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state)); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4253 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state)); |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4254 } |
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4255 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4256 redo_frame: |
2967 | 4257 |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4258 if(pict->pict_type == I_TYPE) |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4259 s->spatial_decomposition_count= 5; |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4260 else |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4261 s->spatial_decomposition_count= 5; |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4262 |
3313 | 4263 s->m.pict_type = pict->pict_type; |
2138 | 4264 s->qbias= pict->pict_type == P_TYPE ? 2 : 0; |
4265 | |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4266 common_init_after_header(avctx); |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4267 |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4268 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){ |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4269 for(plane_index=0; plane_index<3; plane_index++){ |
5909 | 4270 calculate_visual_weight(s, &s->plane[plane_index]); |
5670
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4271 } |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4272 } |
2f3431b34f0f
make changing spatial_decomposition_count per frame work
michael
parents:
5668
diff
changeset
|
4273 |
2138 | 4274 encode_header(s); |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4275 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start); |
3661
b4425339894b
fix snow 1pass rc: the qlog used and the one written to the bitstream weren't always the same.
lorenm
parents:
3556
diff
changeset
|
4276 encode_blocks(s, 1); |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4277 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits; |
2967 | 4278 |
2138 | 4279 for(plane_index=0; plane_index<3; plane_index++){ |
4280 Plane *p= &s->plane[plane_index]; | |
4281 int w= p->width; | |
4282 int h= p->height; | |
4283 int x, y; | |
2198 | 4284 // int bits= put_bits_count(&s->c.pb); |
2138 | 4285 |
6414 | 4286 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){ |
4287 //FIXME optimize | |
4288 if(pict->data[plane_index]) //FIXME gray hack | |
4289 for(y=0; y<h; y++){ | |
4290 for(x=0; x<w; x++){ | |
4291 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS; | |
4292 } | |
4293 } | |
4294 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0); | |
4295 | |
4296 if( plane_index==0 | |
4297 && pict->pict_type == P_TYPE | |
4298 && !(avctx->flags&CODEC_FLAG_PASS2) | |
4299 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){ | |
4300 ff_init_range_encoder(c, buf, buf_size); | |
4301 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
4302 pict->pict_type= FF_I_TYPE; | |
4303 s->keyframe=1; | |
4304 s->current_picture.key_frame=1; | |
4305 goto redo_frame; | |
2138 | 4306 } |
6414 | 4307 |
4308 if(s->qlog == LOSSLESS_QLOG){ | |
4309 for(y=0; y<h; y++){ | |
4310 for(x=0; x<w; x++){ | |
4311 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS; | |
4312 } | |
2161 | 4313 } |
6414 | 4314 }else{ |
4315 for(y=0; y<h; y++){ | |
4316 for(x=0; x<w; x++){ | |
4317 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS; | |
4318 } | |
5575 | 4319 } |
4320 } | |
6414 | 4321 |
4322 /* if(QUANTIZE2) | |
4323 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type); | |
4324 else*/ | |
4325 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
4326 | |
4327 if(s->pass1_rc && plane_index==0){ | |
4328 int delta_qlog = ratecontrol_1pass(s, pict); | |
4329 if (delta_qlog <= INT_MIN) | |
4330 return -1; | |
4331 if(delta_qlog){ | |
4332 //reordering qlog in the bitstream would eliminate this reset | |
4333 ff_init_range_encoder(c, buf, buf_size); | |
4334 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state)); | |
4335 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state)); | |
4336 encode_header(s); | |
4337 encode_blocks(s, 0); | |
2161 | 4338 } |
4339 } | |
6414 | 4340 |
4341 for(level=0; level<s->spatial_decomposition_count; level++){ | |
4342 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4343 SubBand *b= &p->band[level][orientation]; | |
4344 | |
4345 if(!QUANTIZE2) | |
4346 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias); | |
4347 if(orientation==0) | |
4348 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == P_TYPE, 0); | |
4349 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation); | |
4350 assert(b->parent==NULL || b->parent->stride == b->stride*2); | |
4351 if(orientation==0) | |
4352 correlate(s, b, b->ibuf, b->stride, 1, 0); | |
4353 } | |
4354 } | |
4355 | |
4356 for(level=0; level<s->spatial_decomposition_count; level++){ | |
4357 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4358 SubBand *b= &p->band[level][orientation]; | |
4359 | |
4360 dequantize(s, b, b->ibuf, b->stride); | |
4361 } | |
4362 } | |
4363 | |
4364 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
4365 if(s->qlog == LOSSLESS_QLOG){ | |
4366 for(y=0; y<h; y++){ | |
4367 for(x=0; x<w; x++){ | |
4368 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS; | |
4369 } | |
4370 } | |
4371 } | |
4372 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); | |
4373 }else{ | |
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4374 //ME/MC only |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4375 if(pict->pict_type == I_TYPE){ |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4376 for(y=0; y<h; y++){ |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4377 for(x=0; x<w; x++){ |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4378 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4379 pict->data[plane_index][y*pict->linesize[plane_index] + x]; |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4380 } |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4381 } |
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4382 }else{ |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4383 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h); |
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4384 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); |
3338
937f14bb0f23
support doing motion estimation and compensation without any residual transform or coding
michael
parents:
3327
diff
changeset
|
4385 } |
6414 | 4386 } |
2138 | 4387 if(s->avctx->flags&CODEC_FLAG_PSNR){ |
4388 int64_t error= 0; | |
2967 | 4389 |
6414 | 4390 if(pict->data[plane_index]) //FIXME gray hack |
4391 for(y=0; y<h; y++){ | |
4392 for(x=0; x<w; x++){ | |
4393 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]; | |
4394 error += d*d; | |
4395 } | |
2138 | 4396 } |
4397 s->avctx->error[plane_index] += error; | |
2232 | 4398 s->current_picture.error[plane_index] = error; |
2138 | 4399 } |
6414 | 4400 |
2138 | 4401 } |
4402 | |
6144 | 4403 update_last_header_values(s); |
4404 | |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4405 if(s->last_picture[s->max_ref_frames-1].data[0]){ |
3314 | 4406 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4407 for(i=0; i<9; i++) |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4408 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4409 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3])); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4410 } |
2138 | 4411 |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4412 s->current_picture.coded_picture_number = avctx->frame_number; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4413 s->current_picture.pict_type = pict->pict_type; |
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4414 s->current_picture.quality = pict->quality; |
3313 | 4415 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start); |
4416 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits; | |
4417 s->m.current_picture.display_picture_number = | |
4418 s->m.current_picture.coded_picture_number = avctx->frame_number; | |
4419 s->m.current_picture.quality = pict->quality; | |
4420 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start); | |
4421 if(s->pass1_rc) | |
3766 | 4422 if (ff_rate_estimate_qscale(&s->m, 0) < 0) |
4423 return -1; | |
3313 | 4424 if(avctx->flags&CODEC_FLAG_PASS1) |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4425 ff_write_pass1_stats(&s->m); |
3313 | 4426 s->m.last_pict_type = s->m.pict_type; |
4123
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4427 avctx->frame_bits = s->m.frame_bits; |
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4428 avctx->mv_bits = s->m.mv_bits; |
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4429 avctx->misc_bits = s->m.misc_bits; |
5c86acb39889
outputs bit spent on various encoding functions (motion vectors, overhead, etc)
gpoirier
parents:
4122
diff
changeset
|
4430 avctx->p_tex_bits = s->m.p_tex_bits; |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4431 |
2138 | 4432 emms_c(); |
2967 | 4433 |
2335 | 4434 return ff_rac_terminate(c); |
2138 | 4435 } |
4436 | |
4437 static void common_end(SnowContext *s){ | |
3314 | 4438 int plane_index, level, orientation, i; |
2192 | 4439 |
2138 | 4440 av_freep(&s->spatial_dwt_buffer); |
5587
3ae03eacbe9f
use 16bit IDWT (a SIMD implementation of it should be >2x faster then with
michael
parents:
5575
diff
changeset
|
4441 av_freep(&s->spatial_idwt_buffer); |
2138 | 4442 |
2967 | 4443 av_freep(&s->m.me.scratchpad); |
2138 | 4444 av_freep(&s->m.me.map); |
4445 av_freep(&s->m.me.score_map); | |
3033
e8599ab02b38
faster iterative_me: avoid duplicate mc of neighboring blocks.
lorenm
parents:
3020
diff
changeset
|
4446 av_freep(&s->m.obmc_scratchpad); |
2967 | 4447 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4448 av_freep(&s->block); |
2192 | 4449 |
3314 | 4450 for(i=0; i<MAX_REF_FRAMES; i++){ |
4451 av_freep(&s->ref_mvs[i]); | |
4452 av_freep(&s->ref_scores[i]); | |
4453 if(s->last_picture[i].data[0]) | |
4454 s->avctx->release_buffer(s->avctx, &s->last_picture[i]); | |
4455 } | |
4456 | |
2967 | 4457 for(plane_index=0; plane_index<3; plane_index++){ |
2192 | 4458 for(level=s->spatial_decomposition_count-1; level>=0; level--){ |
4459 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4460 SubBand *b= &s->plane[plane_index].band[level][orientation]; | |
2967 | 4461 |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4462 av_freep(&b->x_coeff); |
2192 | 4463 } |
4464 } | |
4465 } | |
2138 | 4466 } |
4467 | |
4468 static int encode_end(AVCodecContext *avctx) | |
4469 { | |
4470 SnowContext *s = avctx->priv_data; | |
4471 | |
4472 common_end(s); | |
2608
4fb7fa34050b
allow 2pass ratecontrol. also fixes psnr displayed by mencoder.
lorenm
parents:
2607
diff
changeset
|
4473 av_free(avctx->stats_out); |
2138 | 4474 |
4475 return 0; | |
4476 } | |
4477 | |
4478 static int decode_init(AVCodecContext *avctx) | |
4479 { | |
2635 | 4480 avctx->pix_fmt= PIX_FMT_YUV420P; |
2138 | 4481 |
4482 common_init(avctx); | |
2967 | 4483 |
2138 | 4484 return 0; |
4485 } | |
4486 | |
6255 | 4487 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size){ |
2138 | 4488 SnowContext *s = avctx->priv_data; |
2335 | 4489 RangeCoder * const c= &s->c; |
2138 | 4490 int bytes_read; |
4491 AVFrame *picture = data; | |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4492 int level, orientation, plane_index, i; |
2138 | 4493 |
2335 | 4494 ff_init_range_decoder(c, buf, buf_size); |
4495 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); | |
2138 | 4496 |
4497 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P | |
5668 | 4498 if(decode_header(s)<0) |
4499 return -1; | |
5665
834f359e7257
perform init after reading the values needed for init
michael
parents:
5664
diff
changeset
|
4500 common_init_after_header(avctx); |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4501 |
5662
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4502 // realloc slice buffer for the case that spatial_decomposition_count changed |
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4503 slice_buffer_destroy(&s->sb); |
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4504 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer); |
77297fe0311c
allocate slice buffer after parsing the header containing needed parameters
michael
parents:
5661
diff
changeset
|
4505 |
5651
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4506 for(plane_index=0; plane_index<3; plane_index++){ |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4507 Plane *p= &s->plane[plane_index]; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4508 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4509 && p->hcoeff[1]==-10 |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4510 && p->hcoeff[2]==2; |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4511 } |
ab023c9f03d0
store halfpel filter coefficients in the header as well as the
michael
parents:
5650
diff
changeset
|
4512 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4513 if(!s->block) alloc_blocks(s); |
2138 | 4514 |
4515 frame_start(s); | |
6412 | 4516 //keyframe flag duplication mess FIXME |
2138 | 4517 if(avctx->debug&FF_DEBUG_PICT_INFO) |
4518 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog); | |
2967 | 4519 |
2189
70b27300a496
quad tree based motion compensation (currently only 16x16 & 8x8 OBMC blocks, but can be extended to other block sizes easily)
michael
parents:
2187
diff
changeset
|
4520 decode_blocks(s); |
2138 | 4521 |
4522 for(plane_index=0; plane_index<3; plane_index++){ | |
4523 Plane *p= &s->plane[plane_index]; | |
4524 int w= p->width; | |
4525 int h= p->height; | |
4526 int x, y; | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4527 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */ |
2967 | 4528 |
6414 | 4529 if(s->avctx->debug&2048){ |
4530 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); | |
4531 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1); | |
4532 | |
4533 for(y=0; y<h; y++){ | |
4534 for(x=0; x<w; x++){ | |
4535 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; | |
4536 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; | |
4537 } | |
2138 | 4538 } |
4539 } | |
6414 | 4540 |
4541 { | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4542 for(level=0; level<s->spatial_decomposition_count; level++){ |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4543 for(orientation=level ? 1 : 0; orientation<4; orientation++){ |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4544 SubBand *b= &p->band[level][orientation]; |
6414 | 4545 unpack_coeffs(s, b, b->parent, orientation); |
4546 } | |
4547 } | |
4548 } | |
4549 | |
4550 { | |
4551 const int mb_h= s->b_height << s->block_max_depth; | |
4552 const int block_size = MB_SIZE >> s->block_max_depth; | |
4553 const int block_w = plane_index ? block_size/2 : block_size; | |
4554 int mb_y; | |
4555 dwt_compose_t cs[MAX_DECOMPOSITIONS]; | |
4556 int yd=0, yq=0; | |
4557 int y; | |
4558 int end_y; | |
4559 | |
4560 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
4561 for(mb_y=0; mb_y<=mb_h; mb_y++){ | |
4562 | |
4563 int slice_starty = block_w*mb_y; | |
4564 int slice_h = block_w*(mb_y+1); | |
4565 if (!(s->keyframe || s->avctx->debug&512)){ | |
4566 slice_starty = FFMAX(0, slice_starty - (block_w >> 1)); | |
4567 slice_h -= (block_w >> 1); | |
4568 } | |
4569 | |
4570 for(level=0; level<s->spatial_decomposition_count; level++){ | |
4571 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4572 SubBand *b= &p->band[level][orientation]; | |
4573 int start_y; | |
4574 int end_y; | |
4575 int our_mb_start = mb_y; | |
4576 int our_mb_end = (mb_y + 1); | |
4577 const int extra= 3; | |
4578 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0); | |
4579 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra); | |
4580 if (!(s->keyframe || s->avctx->debug&512)){ | |
4581 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level))); | |
4582 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level))); | |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4583 } |
6414 | 4584 start_y = FFMIN(b->height, start_y); |
4585 end_y = FFMIN(b->height, end_y); | |
4586 | |
4587 if (start_y != end_y){ | |
4588 if (orientation == 0){ | |
4589 SubBand * correlate_band = &p->band[0][0]; | |
4590 int correlate_end_y = FFMIN(b->height, end_y + 1); | |
4591 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0)); | |
4592 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]); | |
4593 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y); | |
4594 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y); | |
4595 } | |
4596 else | |
4597 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); | |
4598 } | |
2634
337217ecbb3e
tighter snow slicing patch by (Yartrebo || yartrebo earthlink net)
michael
parents:
2628
diff
changeset
|
4599 } |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4600 } |
6414 | 4601 |
4602 for(; yd<slice_h; yd+=4){ | |
4603 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); | |
4604 } | |
4605 | |
4606 if(s->qlog == LOSSLESS_QLOG){ | |
4607 for(; yq<slice_h && yq<h; yq++){ | |
4608 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq); | |
4609 for(x=0; x<w; x++){ | |
4610 line[x] <<= FRAC_BITS; | |
4611 } | |
2161 | 4612 } |
4613 } | |
6414 | 4614 |
4615 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y); | |
4616 | |
4617 y = FFMIN(p->height, slice_starty); | |
4618 end_y = FFMIN(p->height, slice_h); | |
4619 while(y < end_y) | |
4620 slice_buffer_release(&s->sb, y++); | |
2161 | 4621 } |
2562 | 4622 |
6414 | 4623 slice_buffer_flush(&s->sb); |
4624 } | |
4625 | |
2138 | 4626 } |
2967 | 4627 |
2138 | 4628 emms_c(); |
4629 | |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4630 if(s->last_picture[s->max_ref_frames-1].data[0]){ |
3314 | 4631 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); |
5633
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4632 for(i=0; i<9; i++) |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4633 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3]) |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4634 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3])); |
873ea64637d9
code to do halfpel interpolation per frame (unfinished and under ifdef but it
michael
parents:
5627
diff
changeset
|
4635 } |
2138 | 4636 |
6414 | 4637 if(!(s->avctx->debug&2048)) |
4638 *picture= s->current_picture; | |
4639 else | |
4640 *picture= s->mconly_picture; | |
2967 | 4641 |
2138 | 4642 *data_size = sizeof(AVFrame); |
2967 | 4643 |
2335 | 4644 bytes_read= c->bytestream - c->bytestream_start; |
4645 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME | |
2138 | 4646 |
4647 return bytes_read; | |
4648 } | |
4649 | |
4650 static int decode_end(AVCodecContext *avctx) | |
4651 { | |
4652 SnowContext *s = avctx->priv_data; | |
4653 | |
2589
a5a62827f195
Snow Slicing patch by (Yartrebo) yartrebo earthlink net
michael
parents:
2562
diff
changeset
|
4654 slice_buffer_destroy(&s->sb); |
2967 | 4655 |
2138 | 4656 common_end(s); |
4657 | |
4658 return 0; | |
4659 } | |
4660 | |
4661 AVCodec snow_decoder = { | |
4662 "snow", | |
4663 CODEC_TYPE_VIDEO, | |
4664 CODEC_ID_SNOW, | |
4665 sizeof(SnowContext), | |
4666 decode_init, | |
4667 NULL, | |
4668 decode_end, | |
4669 decode_frame, | |
4670 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/, | |
4671 NULL | |
4672 }; | |
4673 | |
5224 | 4674 #ifdef CONFIG_SNOW_ENCODER |
2138 | 4675 AVCodec snow_encoder = { |
4676 "snow", | |
4677 CODEC_TYPE_VIDEO, | |
4678 CODEC_ID_SNOW, | |
4679 sizeof(SnowContext), | |
4680 encode_init, | |
4681 encode_frame, | |
4682 encode_end, | |
4683 }; | |
2408
a6e4da1c28ee
Disable encoders patch by (Gianluigi Tiesi <mplayer netfarm it>)
michael
parents:
2368
diff
changeset
|
4684 #endif |
2138 | 4685 |
4686 | |
6164
ecaf5226e9b0
Consistently use TEST as the preprocessor condition to enable test code.
diego
parents:
6144
diff
changeset
|
4687 #ifdef TEST |
2138 | 4688 #undef malloc |
4689 #undef free | |
4690 #undef printf | |
5380
389366aa3458
Fix the self tests which are contained in some codecs and are using random().
takis
parents:
5254
diff
changeset
|
4691 #undef random |
2138 | 4692 |
5934 | 4693 int main(void){ |
2138 | 4694 int width=256; |
4695 int height=256; | |
4696 int buffer[2][width*height]; | |
4697 SnowContext s; | |
4698 int i; | |
4699 s.spatial_decomposition_count=6; | |
4700 s.spatial_decomposition_type=1; | |
2967 | 4701 |
2138 | 4702 printf("testing 5/3 DWT\n"); |
4703 for(i=0; i<width*height; i++) | |
4704 buffer[0][i]= buffer[1][i]= random()%54321 - 12345; | |
2967 | 4705 |
2951 | 4706 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
4707 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
2967 | 4708 |
2138 | 4709 for(i=0; i<width*height; i++) |
4710 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]); | |
4711 | |
4712 printf("testing 9/7 DWT\n"); | |
4713 s.spatial_decomposition_type=0; | |
4714 for(i=0; i<width*height; i++) | |
4715 buffer[0][i]= buffer[1][i]= random()%54321 - 12345; | |
2967 | 4716 |
2951 | 4717 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
4718 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
2967 | 4719 |
2138 | 4720 for(i=0; i<width*height; i++) |
4001 | 4721 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]); |
2967 | 4722 |
2951 | 4723 #if 0 |
2138 | 4724 printf("testing AC coder\n"); |
4725 memset(s.header_state, 0, sizeof(s.header_state)); | |
2335 | 4726 ff_init_range_encoder(&s.c, buffer[0], 256*256); |
2138 | 4727 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); |
2967 | 4728 |
2138 | 4729 for(i=-256; i<256; i++){ |
4001 | 4730 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1); |
2138 | 4731 } |
2335 | 4732 ff_rac_terminate(&s.c); |
2138 | 4733 |
4734 memset(s.header_state, 0, sizeof(s.header_state)); | |
2335 | 4735 ff_init_range_decoder(&s.c, buffer[0], 256*256); |
2138 | 4736 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); |
2967 | 4737 |
2138 | 4738 for(i=-256; i<256; i++){ |
4739 int j; | |
4740 j= get_symbol(&s.c, s.header_state, 1); | |
4001 | 4741 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j); |
2138 | 4742 } |
2951 | 4743 #endif |
6414 | 4744 { |
4745 int level, orientation, x, y; | |
4746 int64_t errors[8][4]; | |
4747 int64_t g=0; | |
4748 | |
4749 memset(errors, 0, sizeof(errors)); | |
4750 s.spatial_decomposition_count=3; | |
4751 s.spatial_decomposition_type=0; | |
4752 for(level=0; level<s.spatial_decomposition_count; level++){ | |
4753 for(orientation=level ? 1 : 0; orientation<4; orientation++){ | |
4754 int w= width >> (s.spatial_decomposition_count-level); | |
4755 int h= height >> (s.spatial_decomposition_count-level); | |
4756 int stride= width << (s.spatial_decomposition_count-level); | |
4757 DWTELEM *buf= buffer[0]; | |
4758 int64_t error=0; | |
4759 | |
4760 if(orientation&1) buf+=w; | |
4761 if(orientation>1) buf+=stride>>1; | |
4762 | |
4763 memset(buffer[0], 0, sizeof(int)*width*height); | |
4764 buf[w/2 + h/2*stride]= 256*256; | |
4765 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); | |
4766 for(y=0; y<height; y++){ | |
4767 for(x=0; x<width; x++){ | |
4768 int64_t d= buffer[0][x + y*width]; | |
4769 error += d*d; | |
4770 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d); | |
4771 } | |
4772 if(FFABS(height/2-y)<9 && level==2) printf("\n"); | |
2138 | 4773 } |
6414 | 4774 error= (int)(sqrt(error)+0.5); |
4775 errors[level][orientation]= error; | |
4776 if(g) g=ff_gcd(g, error); | |
4777 else g= error; | |
2138 | 4778 } |
4779 } | |
6414 | 4780 printf("static int const visual_weight[][4]={\n"); |
4781 for(level=0; level<s.spatial_decomposition_count; level++){ | |
4782 printf(" {"); | |
4783 for(orientation=0; orientation<4; orientation++){ | |
4784 printf("%8"PRId64",", errors[level][orientation]/g); | |
4785 } | |
4786 printf("},\n"); | |
2138 | 4787 } |
6414 | 4788 printf("};\n"); |
4789 { | |
2138 | 4790 int level=2; |
4791 int w= width >> (s.spatial_decomposition_count-level); | |
6168 | 4792 //int h= height >> (s.spatial_decomposition_count-level); |
2138 | 4793 int stride= width << (s.spatial_decomposition_count-level); |
4794 DWTELEM *buf= buffer[0]; | |
4795 int64_t error=0; | |
4796 | |
4797 buf+=w; | |
4798 buf+=stride>>1; | |
2967 | 4799 |
2138 | 4800 memset(buffer[0], 0, sizeof(int)*width*height); |
4801 #if 1 | |
4802 for(y=0; y<height; y++){ | |
4803 for(x=0; x<width; x++){ | |
4804 int tab[4]={0,2,3,1}; | |
4805 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)]; | |
4806 } | |
4807 } | |
2951 | 4808 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
2138 | 4809 #else |
4810 for(y=0; y<h; y++){ | |
4811 for(x=0; x<w; x++){ | |
4812 buf[x + y*stride ]=169; | |
4813 buf[x + y*stride-w]=64; | |
4814 } | |
4815 } | |
2951 | 4816 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count); |
2138 | 4817 #endif |
4818 for(y=0; y<height; y++){ | |
4819 for(x=0; x<width; x++){ | |
4820 int64_t d= buffer[0][x + y*width]; | |
4821 error += d*d; | |
4122
daae66c03857
Replace most of the %lld and %llx by their (cleaner) PRI*64 counterparts.
diego
parents:
4011
diff
changeset
|
4822 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d); |
2138 | 4823 } |
4001 | 4824 if(FFABS(height/2-y)<9) printf("\n"); |
2138 | 4825 } |
6414 | 4826 } |
4827 | |
2138 | 4828 } |
4829 return 0; | |
4830 } | |
6164
ecaf5226e9b0
Consistently use TEST as the preprocessor condition to enable test code.
diego
parents:
6144
diff
changeset
|
4831 #endif /* TEST */ |