808
|
1 /*
|
|
2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
|
|
3 *
|
|
4 * This file is part of FFmpeg.
|
|
5 *
|
|
6 * FFmpeg is free software; you can redistribute it and/or
|
|
7 * modify it under the terms of the GNU Lesser General Public
|
|
8 * License as published by the Free Software Foundation; either
|
|
9 * version 2.1 of the License, or (at your option) any later version.
|
|
10 *
|
|
11 * FFmpeg is distributed in the hope that it will be useful,
|
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14 * Lesser General Public License for more details.
|
|
15 *
|
|
16 * You should have received a copy of the GNU Lesser General Public
|
|
17 * License along with FFmpeg; if not, write to the Free Software
|
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
19 */
|
|
20
|
|
21 #include "avcodec.h"
|
|
22 #include "common.h"
|
|
23 #include "dsputil.h"
|
|
24 #include "snow.h"
|
|
25
|
|
26 #include "rangecoder.h"
|
|
27
|
|
28 #include "mpegvideo.h"
|
|
29
|
|
30 #undef NDEBUG
|
|
31 #include <assert.h>
|
|
32
|
|
33 static const int8_t quant3[256]={
|
|
34 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
|
|
50 };
|
|
51 static const int8_t quant3b[256]={
|
|
52 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
68 };
|
|
69 static const int8_t quant3bA[256]={
|
|
70 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
|
|
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
|
|
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
|
|
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
|
|
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
|
|
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
|
|
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
|
|
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
|
|
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
|
|
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
|
|
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
|
|
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
|
|
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
|
|
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
|
|
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
|
|
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
|
|
86 };
|
|
87 static const int8_t quant5[256]={
|
|
88 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
|
|
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
|
|
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
|
|
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
|
|
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
|
|
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
|
|
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
|
|
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
|
|
104 };
|
|
105 static const int8_t quant7[256]={
|
|
106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
|
|
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
|
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
|
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
|
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
|
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
|
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
|
|
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
|
|
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
|
|
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
|
|
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
|
|
119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
|
|
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
|
|
121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
|
|
122 };
|
|
123 static const int8_t quant9[256]={
|
|
124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
|
125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
|
|
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
|
|
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
|
|
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
|
|
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
|
|
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
|
|
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
|
|
139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
|
|
140 };
|
|
141 static const int8_t quant11[256]={
|
|
142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
|
|
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
|
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
|
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
|
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
|
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
|
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
|
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
|
|
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
|
|
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
|
|
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
|
|
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
|
|
155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
|
|
156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
|
|
157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
|
|
158 };
|
|
159 static const int8_t quant13[256]={
|
|
160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
|
162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
|
163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
|
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
|
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
|
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
|
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
|
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
|
|
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
|
|
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
|
|
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
|
|
172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
|
|
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
|
|
174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
|
|
175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
|
|
176 };
|
|
177
|
|
178 #if 0 //64*cubic
|
|
179 static const uint8_t obmc32[1024]={
|
|
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
181 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
|
182 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
|
|
183 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
|
|
184 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
|
|
185 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
|
|
186 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
|
|
187 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
|
|
188 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
|
|
189 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
|
|
190 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
|
|
191 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
|
|
192 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
|
|
193 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
|
|
194 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
|
|
195 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
|
|
196 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
|
|
197 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
|
|
198 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
|
|
199 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
|
|
200 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
|
|
201 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
|
|
202 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
|
|
203 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
|
|
204 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
|
|
205 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
|
|
206 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
|
|
207 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
|
|
208 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
|
|
209 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
|
|
210 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
|
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
212 //error:0.000022
|
|
213 };
|
|
214 static const uint8_t obmc16[256]={
|
|
215 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
|
|
216 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
|
|
217 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
|
|
218 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
|
|
219 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
|
|
220 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
|
|
221 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
|
|
222 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
|
|
223 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
|
|
224 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
|
|
225 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
|
|
226 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
|
|
227 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
|
|
228 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
|
|
229 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
|
|
230 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
|
|
231 //error:0.000033
|
|
232 };
|
|
233 #elif 1 // 64*linear
|
|
234 static const uint8_t obmc32[1024]={
|
|
235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
|
|
236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
|
|
237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
|
|
238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
|
|
239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
|
|
240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
|
|
241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
|
|
242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
|
|
243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
|
|
244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
|
|
245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
|
|
246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
|
|
247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
|
|
248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
|
|
249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
|
|
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
|
|
251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
|
|
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
|
|
253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
|
|
254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
|
|
255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
|
|
256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
|
|
257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
|
|
258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
|
|
259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
|
|
260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
|
|
261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
|
|
262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
|
|
263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
|
|
264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
|
|
265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
|
|
266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
|
|
267 //error:0.000020
|
|
268 };
|
|
269 static const uint8_t obmc16[256]={
|
|
270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
|
|
271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
|
|
272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
|
|
273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
|
|
274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
|
|
275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
|
|
276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
|
|
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
|
|
278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
|
|
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
|
|
280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
|
|
281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
|
|
282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
|
|
283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
|
|
284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
|
|
285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
|
|
286 //error:0.000015
|
|
287 };
|
|
288 #else //64*cos
|
|
289 static const uint8_t obmc32[1024]={
|
|
290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
291 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
|
|
292 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
|
|
293 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
|
|
294 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
|
|
295 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
|
|
296 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
|
|
297 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
|
|
298 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
|
|
299 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
|
|
300 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
|
|
301 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
|
|
302 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
|
|
303 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
|
|
304 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
|
|
305 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
|
|
306 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
|
|
307 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
|
|
308 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
|
|
309 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
|
|
310 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
|
|
311 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
|
|
312 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
|
|
313 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
|
|
314 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
|
|
315 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
|
|
316 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
|
|
317 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
|
|
318 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
|
|
319 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
|
|
320 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
|
|
321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
322 //error:0.000022
|
|
323 };
|
|
324 static const uint8_t obmc16[256]={
|
|
325 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
|
326 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
|
|
327 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
|
|
328 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
|
|
329 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
|
|
330 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
|
|
331 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
|
|
332 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
|
|
333 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
|
|
334 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
|
|
335 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
|
|
336 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
|
|
337 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
|
|
338 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
|
|
339 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
|
|
340 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
|
341 //error:0.000022
|
|
342 };
|
|
343 #endif
|
|
344
|
|
345 //linear *64
|
|
346 static const uint8_t obmc8[64]={
|
|
347 4, 12, 20, 28, 28, 20, 12, 4,
|
|
348 12, 36, 60, 84, 84, 60, 36, 12,
|
|
349 20, 60,100,140,140,100, 60, 20,
|
|
350 28, 84,140,196,196,140, 84, 28,
|
|
351 28, 84,140,196,196,140, 84, 28,
|
|
352 20, 60,100,140,140,100, 60, 20,
|
|
353 12, 36, 60, 84, 84, 60, 36, 12,
|
|
354 4, 12, 20, 28, 28, 20, 12, 4,
|
|
355 //error:0.000000
|
|
356 };
|
|
357
|
|
358 //linear *64
|
|
359 static const uint8_t obmc4[16]={
|
|
360 16, 48, 48, 16,
|
|
361 48,144,144, 48,
|
|
362 48,144,144, 48,
|
|
363 16, 48, 48, 16,
|
|
364 //error:0.000000
|
|
365 };
|
|
366
|
|
367 static const uint8_t *obmc_tab[4]={
|
|
368 obmc32, obmc16, obmc8, obmc4
|
|
369 };
|
|
370
|
|
371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
|
|
372
|
|
373 typedef struct BlockNode{
|
|
374 int16_t mx;
|
|
375 int16_t my;
|
|
376 uint8_t ref;
|
|
377 uint8_t color[3];
|
|
378 uint8_t type;
|
|
379 //#define TYPE_SPLIT 1
|
|
380 #define BLOCK_INTRA 1
|
|
381 #define BLOCK_OPT 2
|
|
382 //#define TYPE_NOCOLOR 4
|
|
383 uint8_t level; //FIXME merge into type?
|
|
384 }BlockNode;
|
|
385
|
|
386 static const BlockNode null_block= { //FIXME add border maybe
|
|
387 .color= {128,128,128},
|
|
388 .mx= 0,
|
|
389 .my= 0,
|
|
390 .ref= 0,
|
|
391 .type= 0,
|
|
392 .level= 0,
|
|
393 };
|
|
394
|
|
395 #define LOG2_MB_SIZE 4
|
|
396 #define MB_SIZE (1<<LOG2_MB_SIZE)
|
|
397
|
|
398 typedef struct x_and_coeff{
|
|
399 int16_t x;
|
|
400 uint16_t coeff;
|
|
401 } x_and_coeff;
|
|
402
|
|
403 typedef struct SubBand{
|
|
404 int level;
|
|
405 int stride;
|
|
406 int width;
|
|
407 int height;
|
|
408 int qlog; ///< log(qscale)/log[2^(1/6)]
|
|
409 DWTELEM *buf;
|
|
410 int buf_x_offset;
|
|
411 int buf_y_offset;
|
|
412 int stride_line; ///< Stride measured in lines, not pixels.
|
|
413 x_and_coeff * x_coeff;
|
|
414 struct SubBand *parent;
|
|
415 uint8_t state[/*7*2*/ 7 + 512][32];
|
|
416 }SubBand;
|
|
417
|
|
418 typedef struct Plane{
|
|
419 int width;
|
|
420 int height;
|
|
421 SubBand band[MAX_DECOMPOSITIONS][4];
|
|
422 }Plane;
|
|
423
|
|
424 typedef struct SnowContext{
|
|
425 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
|
|
426
|
|
427 AVCodecContext *avctx;
|
|
428 RangeCoder c;
|
|
429 DSPContext dsp;
|
|
430 AVFrame new_picture;
|
|
431 AVFrame input_picture; ///< new_picture with the internal linesizes
|
|
432 AVFrame current_picture;
|
|
433 AVFrame last_picture[MAX_REF_FRAMES];
|
|
434 AVFrame mconly_picture;
|
|
435 // uint8_t q_context[16];
|
|
436 uint8_t header_state[32];
|
|
437 uint8_t block_state[128 + 32*128];
|
|
438 int keyframe;
|
|
439 int always_reset;
|
|
440 int version;
|
|
441 int spatial_decomposition_type;
|
|
442 int temporal_decomposition_type;
|
|
443 int spatial_decomposition_count;
|
|
444 int temporal_decomposition_count;
|
|
445 int max_ref_frames;
|
|
446 int ref_frames;
|
|
447 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
|
|
448 uint32_t *ref_scores[MAX_REF_FRAMES];
|
|
449 DWTELEM *spatial_dwt_buffer;
|
|
450 int colorspace_type;
|
|
451 int chroma_h_shift;
|
|
452 int chroma_v_shift;
|
|
453 int spatial_scalability;
|
|
454 int qlog;
|
|
455 int lambda;
|
|
456 int lambda2;
|
|
457 int pass1_rc;
|
|
458 int mv_scale;
|
|
459 int qbias;
|
|
460 #define QBIAS_SHIFT 3
|
|
461 int b_width;
|
|
462 int b_height;
|
|
463 int block_max_depth;
|
|
464 Plane plane[MAX_PLANES];
|
|
465 BlockNode *block;
|
|
466 #define ME_CACHE_SIZE 1024
|
|
467 int me_cache[ME_CACHE_SIZE];
|
|
468 int me_cache_generation;
|
|
469 slice_buffer sb;
|
|
470
|
|
471 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
|
|
472 }SnowContext;
|
|
473
|
|
474 typedef struct {
|
|
475 DWTELEM *b0;
|
|
476 DWTELEM *b1;
|
|
477 DWTELEM *b2;
|
|
478 DWTELEM *b3;
|
|
479 int y;
|
|
480 } dwt_compose_t;
|
|
481
|
|
482 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
|
|
483 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
|
|
484
|
|
485 static void iterative_me(SnowContext *s);
|
|
486
|
|
487 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
|
|
488 {
|
|
489 int i;
|
|
490
|
|
491 buf->base_buffer = base_buffer;
|
|
492 buf->line_count = line_count;
|
|
493 buf->line_width = line_width;
|
|
494 buf->data_count = max_allocated_lines;
|
|
495 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
|
|
496 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
|
|
497
|
|
498 for (i = 0; i < max_allocated_lines; i++)
|
|
499 {
|
|
500 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
|
|
501 }
|
|
502
|
|
503 buf->data_stack_top = max_allocated_lines - 1;
|
|
504 }
|
|
505
|
|
506 static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
|
|
507 {
|
|
508 int offset;
|
|
509 DWTELEM * buffer;
|
|
510
|
|
511 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
|
|
512
|
|
513 assert(buf->data_stack_top >= 0);
|
|
514 // assert(!buf->line[line]);
|
|
515 if (buf->line[line])
|
|
516 return buf->line[line];
|
|
517
|
|
518 offset = buf->line_width * line;
|
|
519 buffer = buf->data_stack[buf->data_stack_top];
|
|
520 buf->data_stack_top--;
|
|
521 buf->line[line] = buffer;
|
|
522
|
|
523 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
|
|
524
|
|
525 return buffer;
|
|
526 }
|
|
527
|
|
528 static void slice_buffer_release(slice_buffer * buf, int line)
|
|
529 {
|
|
530 int offset;
|
|
531 DWTELEM * buffer;
|
|
532
|
|
533 assert(line >= 0 && line < buf->line_count);
|
|
534 assert(buf->line[line]);
|
|
535
|
|
536 offset = buf->line_width * line;
|
|
537 buffer = buf->line[line];
|
|
538 buf->data_stack_top++;
|
|
539 buf->data_stack[buf->data_stack_top] = buffer;
|
|
540 buf->line[line] = NULL;
|
|
541
|
|
542 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
|
|
543 }
|
|
544
|
|
545 static void slice_buffer_flush(slice_buffer * buf)
|
|
546 {
|
|
547 int i;
|
|
548 for (i = 0; i < buf->line_count; i++)
|
|
549 {
|
|
550 if (buf->line[i])
|
|
551 {
|
|
552 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
|
|
553 slice_buffer_release(buf, i);
|
|
554 }
|
|
555 }
|
|
556 }
|
|
557
|
|
558 static void slice_buffer_destroy(slice_buffer * buf)
|
|
559 {
|
|
560 int i;
|
|
561 slice_buffer_flush(buf);
|
|
562
|
|
563 for (i = buf->data_count - 1; i >= 0; i--)
|
|
564 {
|
|
565 assert(buf->data_stack[i]);
|
|
566 av_freep(&buf->data_stack[i]);
|
|
567 }
|
|
568 assert(buf->data_stack);
|
|
569 av_freep(&buf->data_stack);
|
|
570 assert(buf->line);
|
|
571 av_freep(&buf->line);
|
|
572 }
|
|
573
|
|
574 #ifdef __sgi
|
|
575 // Avoid a name clash on SGI IRIX
|
|
576 #undef qexp
|
|
577 #endif
|
|
578 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
|
|
579 static uint8_t qexp[QROOT];
|
|
580
|
|
581 static inline int mirror(int v, int m){
|
|
582 while((unsigned)v > (unsigned)m){
|
|
583 v=-v;
|
|
584 if(v<0) v+= 2*m;
|
|
585 }
|
|
586 return v;
|
|
587 }
|
|
588
|
|
589 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
|
|
590 int i;
|
|
591
|
|
592 if(v){
|
|
593 const int a= FFABS(v);
|
|
594 const int e= av_log2(a);
|
|
595 #if 1
|
|
596 const int el= FFMIN(e, 10);
|
|
597 put_rac(c, state+0, 0);
|
|
598
|
|
599 for(i=0; i<el; i++){
|
|
600 put_rac(c, state+1+i, 1); //1..10
|
|
601 }
|
|
602 for(; i<e; i++){
|
|
603 put_rac(c, state+1+9, 1); //1..10
|
|
604 }
|
|
605 put_rac(c, state+1+FFMIN(i,9), 0);
|
|
606
|
|
607 for(i=e-1; i>=el; i--){
|
|
608 put_rac(c, state+22+9, (a>>i)&1); //22..31
|
|
609 }
|
|
610 for(; i>=0; i--){
|
|
611 put_rac(c, state+22+i, (a>>i)&1); //22..31
|
|
612 }
|
|
613
|
|
614 if(is_signed)
|
|
615 put_rac(c, state+11 + el, v < 0); //11..21
|
|
616 #else
|
|
617
|
|
618 put_rac(c, state+0, 0);
|
|
619 if(e<=9){
|
|
620 for(i=0; i<e; i++){
|
|
621 put_rac(c, state+1+i, 1); //1..10
|
|
622 }
|
|
623 put_rac(c, state+1+i, 0);
|
|
624
|
|
625 for(i=e-1; i>=0; i--){
|
|
626 put_rac(c, state+22+i, (a>>i)&1); //22..31
|
|
627 }
|
|
628
|
|
629 if(is_signed)
|
|
630 put_rac(c, state+11 + e, v < 0); //11..21
|
|
631 }else{
|
|
632 for(i=0; i<e; i++){
|
|
633 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
|
|
634 }
|
|
635 put_rac(c, state+1+FFMIN(i,9), 0);
|
|
636
|
|
637 for(i=e-1; i>=0; i--){
|
|
638 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
|
|
639 }
|
|
640
|
|
641 if(is_signed)
|
|
642 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
|
|
643 }
|
|
644 #endif
|
|
645 }else{
|
|
646 put_rac(c, state+0, 1);
|
|
647 }
|
|
648 }
|
|
649
|
|
650 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
|
|
651 if(get_rac(c, state+0))
|
|
652 return 0;
|
|
653 else{
|
|
654 int i, e, a;
|
|
655 e= 0;
|
|
656 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
|
|
657 e++;
|
|
658 }
|
|
659
|
|
660 a= 1;
|
|
661 for(i=e-1; i>=0; i--){
|
|
662 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
|
|
663 }
|
|
664
|
|
665 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
|
|
666 return -a;
|
|
667 else
|
|
668 return a;
|
|
669 }
|
|
670 }
|
|
671
|
|
672 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
|
|
673 int i;
|
|
674 int r= log2>=0 ? 1<<log2 : 1;
|
|
675
|
|
676 assert(v>=0);
|
|
677 assert(log2>=-4);
|
|
678
|
|
679 while(v >= r){
|
|
680 put_rac(c, state+4+log2, 1);
|
|
681 v -= r;
|
|
682 log2++;
|
|
683 if(log2>0) r+=r;
|
|
684 }
|
|
685 put_rac(c, state+4+log2, 0);
|
|
686
|
|
687 for(i=log2-1; i>=0; i--){
|
|
688 put_rac(c, state+31-i, (v>>i)&1);
|
|
689 }
|
|
690 }
|
|
691
|
|
692 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
|
|
693 int i;
|
|
694 int r= log2>=0 ? 1<<log2 : 1;
|
|
695 int v=0;
|
|
696
|
|
697 assert(log2>=-4);
|
|
698
|
|
699 while(get_rac(c, state+4+log2)){
|
|
700 v+= r;
|
|
701 log2++;
|
|
702 if(log2>0) r+=r;
|
|
703 }
|
|
704
|
|
705 for(i=log2-1; i>=0; i--){
|
|
706 v+= get_rac(c, state+31-i)<<i;
|
|
707 }
|
|
708
|
|
709 return v;
|
|
710 }
|
|
711
|
|
712 static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
|
|
713 const int mirror_left= !highpass;
|
|
714 const int mirror_right= (width&1) ^ highpass;
|
|
715 const int w= (width>>1) - 1 + (highpass & width);
|
|
716 int i;
|
|
717
|
|
718 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
|
|
719 if(mirror_left){
|
|
720 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
|
|
721 dst += dst_step;
|
|
722 src += src_step;
|
|
723 }
|
|
724
|
|
725 for(i=0; i<w; i++){
|
|
726 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
|
|
727 }
|
|
728
|
|
729 if(mirror_right){
|
|
730 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
|
|
731 }
|
|
732 }
|
|
733
|
|
734 #ifndef lift5
|
|
735 static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
|
|
736 const int mirror_left= !highpass;
|
|
737 const int mirror_right= (width&1) ^ highpass;
|
|
738 const int w= (width>>1) - 1 + (highpass & width);
|
|
739 int i;
|
|
740
|
|
741 if(mirror_left){
|
|
742 int r= 3*2*ref[0];
|
|
743 r += r>>4;
|
|
744 r += r>>8;
|
|
745 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
|
|
746 dst += dst_step;
|
|
747 src += src_step;
|
|
748 }
|
|
749
|
|
750 for(i=0; i<w; i++){
|
|
751 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
|
|
752 r += r>>4;
|
|
753 r += r>>8;
|
|
754 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
|
|
755 }
|
|
756
|
|
757 if(mirror_right){
|
|
758 int r= 3*2*ref[w*ref_step];
|
|
759 r += r>>4;
|
|
760 r += r>>8;
|
|
761 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
|
|
762 }
|
|
763 }
|
|
764 #endif
|
|
765
|
|
766 #ifndef liftS
|
|
767 static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
|
|
768 const int mirror_left= !highpass;
|
|
769 const int mirror_right= (width&1) ^ highpass;
|
|
770 const int w= (width>>1) - 1 + (highpass & width);
|
|
771 int i;
|
|
772
|
|
773 assert(shift == 4);
|
|
774 #define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
|
|
775 if(mirror_left){
|
|
776 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
|
|
777 dst += dst_step;
|
|
778 src += src_step;
|
|
779 }
|
|
780
|
|
781 for(i=0; i<w; i++){
|
|
782 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
|
|
783 }
|
|
784
|
|
785 if(mirror_right){
|
|
786 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
|
|
787 }
|
|
788 }
|
|
789 #endif
|
|
790
|
|
791
|
|
792 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
|
|
793 int x, i;
|
|
794
|
|
795 for(x=start; x<width; x+=2){
|
|
796 int64_t sum=0;
|
|
797
|
|
798 for(i=0; i<n; i++){
|
|
799 int x2= x + 2*i - n + 1;
|
|
800 if (x2< 0) x2= -x2;
|
|
801 else if(x2>=width) x2= 2*width-x2-2;
|
|
802 sum += coeffs[i]*(int64_t)dst[x2];
|
|
803 }
|
|
804 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
|
|
805 else dst[x] += (sum + (1<<shift)/2)>>shift;
|
|
806 }
|
|
807 }
|
|
808
|
|
809 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
|
|
810 int x, y, i;
|
|
811 for(y=start; y<height; y+=2){
|
|
812 for(x=0; x<width; x++){
|
|
813 int64_t sum=0;
|
|
814
|
|
815 for(i=0; i<n; i++){
|
|
816 int y2= y + 2*i - n + 1;
|
|
817 if (y2< 0) y2= -y2;
|
|
818 else if(y2>=height) y2= 2*height-y2-2;
|
|
819 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
|
|
820 }
|
|
821 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
|
|
822 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
|
|
823 }
|
|
824 }
|
|
825 }
|
|
826
|
|
827 #define SCALEX 1
|
|
828 #define LX0 0
|
|
829 #define LX1 1
|
|
830
|
|
831 #if 0 // more accurate 9/7
|
|
832 #define N1 2
|
|
833 #define SHIFT1 14
|
|
834 #define COEFFS1 (int[]){-25987,-25987}
|
|
835 #define N2 2
|
|
836 #define SHIFT2 19
|
|
837 #define COEFFS2 (int[]){-27777,-27777}
|
|
838 #define N3 2
|
|
839 #define SHIFT3 15
|
|
840 #define COEFFS3 (int[]){28931,28931}
|
|
841 #define N4 2
|
|
842 #define SHIFT4 15
|
|
843 #define COEFFS4 (int[]){14533,14533}
|
|
844 #elif 1 // 13/7 CRF
|
|
845 #define N1 4
|
|
846 #define SHIFT1 4
|
|
847 #define COEFFS1 (int[]){1,-9,-9,1}
|
|
848 #define N2 4
|
|
849 #define SHIFT2 4
|
|
850 #define COEFFS2 (int[]){-1,5,5,-1}
|
|
851 #define N3 0
|
|
852 #define SHIFT3 1
|
|
853 #define COEFFS3 NULL
|
|
854 #define N4 0
|
|
855 #define SHIFT4 1
|
|
856 #define COEFFS4 NULL
|
|
857 #elif 1 // 3/5
|
|
858 #define LX0 1
|
|
859 #define LX1 0
|
|
860 #define SCALEX 0.5
|
|
861 #define N1 2
|
|
862 #define SHIFT1 1
|
|
863 #define COEFFS1 (int[]){1,1}
|
|
864 #define N2 2
|
|
865 #define SHIFT2 2
|
|
866 #define COEFFS2 (int[]){-1,-1}
|
|
867 #define N3 0
|
|
868 #define SHIFT3 0
|
|
869 #define COEFFS3 NULL
|
|
870 #define N4 0
|
|
871 #define SHIFT4 0
|
|
872 #define COEFFS4 NULL
|
|
873 #elif 1 // 11/5
|
|
874 #define N1 0
|
|
875 #define SHIFT1 1
|
|
876 #define COEFFS1 NULL
|
|
877 #define N2 2
|
|
878 #define SHIFT2 2
|
|
879 #define COEFFS2 (int[]){-1,-1}
|
|
880 #define N3 2
|
|
881 #define SHIFT3 0
|
|
882 #define COEFFS3 (int[]){-1,-1}
|
|
883 #define N4 4
|
|
884 #define SHIFT4 7
|
|
885 #define COEFFS4 (int[]){-5,29,29,-5}
|
|
886 #define SCALEX 4
|
|
887 #elif 1 // 9/7 CDF
|
|
888 #define N1 2
|
|
889 #define SHIFT1 7
|
|
890 #define COEFFS1 (int[]){-203,-203}
|
|
891 #define N2 2
|
|
892 #define SHIFT2 12
|
|
893 #define COEFFS2 (int[]){-217,-217}
|
|
894 #define N3 2
|
|
895 #define SHIFT3 7
|
|
896 #define COEFFS3 (int[]){113,113}
|
|
897 #define N4 2
|
|
898 #define SHIFT4 9
|
|
899 #define COEFFS4 (int[]){227,227}
|
|
900 #define SCALEX 1
|
|
901 #elif 1 // 7/5 CDF
|
|
902 #define N1 0
|
|
903 #define SHIFT1 1
|
|
904 #define COEFFS1 NULL
|
|
905 #define N2 2
|
|
906 #define SHIFT2 2
|
|
907 #define COEFFS2 (int[]){-1,-1}
|
|
908 #define N3 2
|
|
909 #define SHIFT3 0
|
|
910 #define COEFFS3 (int[]){-1,-1}
|
|
911 #define N4 2
|
|
912 #define SHIFT4 4
|
|
913 #define COEFFS4 (int[]){3,3}
|
|
914 #elif 1 // 9/7 MN
|
|
915 #define N1 4
|
|
916 #define SHIFT1 4
|
|
917 #define COEFFS1 (int[]){1,-9,-9,1}
|
|
918 #define N2 2
|
|
919 #define SHIFT2 2
|
|
920 #define COEFFS2 (int[]){1,1}
|
|
921 #define N3 0
|
|
922 #define SHIFT3 1
|
|
923 #define COEFFS3 NULL
|
|
924 #define N4 0
|
|
925 #define SHIFT4 1
|
|
926 #define COEFFS4 NULL
|
|
927 #else // 13/7 CRF
|
|
928 #define N1 4
|
|
929 #define SHIFT1 4
|
|
930 #define COEFFS1 (int[]){1,-9,-9,1}
|
|
931 #define N2 4
|
|
932 #define SHIFT2 4
|
|
933 #define COEFFS2 (int[]){-1,5,5,-1}
|
|
934 #define N3 0
|
|
935 #define SHIFT3 1
|
|
936 #define COEFFS3 NULL
|
|
937 #define N4 0
|
|
938 #define SHIFT4 1
|
|
939 #define COEFFS4 NULL
|
|
940 #endif
|
|
941 static void horizontal_decomposeX(DWTELEM *b, int width){
|
|
942 DWTELEM temp[width];
|
|
943 const int width2= width>>1;
|
|
944 const int w2= (width+1)>>1;
|
|
945 int x;
|
|
946
|
|
947 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
|
|
948 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
|
|
949 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
|
|
950 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
|
|
951
|
|
952 for(x=0; x<width2; x++){
|
|
953 temp[x ]= b[2*x ];
|
|
954 temp[x+w2]= b[2*x + 1];
|
|
955 }
|
|
956 if(width&1)
|
|
957 temp[x ]= b[2*x ];
|
|
958 memcpy(b, temp, width*sizeof(int));
|
|
959 }
|
|
960
|
|
961 static void horizontal_composeX(DWTELEM *b, int width){
|
|
962 DWTELEM temp[width];
|
|
963 const int width2= width>>1;
|
|
964 int x;
|
|
965 const int w2= (width+1)>>1;
|
|
966
|
|
967 memcpy(temp, b, width*sizeof(int));
|
|
968 for(x=0; x<width2; x++){
|
|
969 b[2*x ]= temp[x ];
|
|
970 b[2*x + 1]= temp[x+w2];
|
|
971 }
|
|
972 if(width&1)
|
|
973 b[2*x ]= temp[x ];
|
|
974
|
|
975 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
|
|
976 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
|
|
977 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
|
|
978 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
|
|
979 }
|
|
980
|
|
981 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
|
|
982 int x, y;
|
|
983
|
|
984 for(y=0; y<height; y++){
|
|
985 for(x=0; x<width; x++){
|
|
986 buffer[y*stride + x] *= SCALEX;
|
|
987 }
|
|
988 }
|
|
989
|
|
990 for(y=0; y<height; y++){
|
|
991 horizontal_decomposeX(buffer + y*stride, width);
|
|
992 }
|
|
993
|
|
994 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
|
|
995 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
|
|
996 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
|
|
997 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
|
|
998 }
|
|
999
|
|
1000 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
|
|
1001 int x, y;
|
|
1002
|
|
1003 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
|
|
1004 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
|
|
1005 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
|
|
1006 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
|
|
1007
|
|
1008 for(y=0; y<height; y++){
|
|
1009 horizontal_composeX(buffer + y*stride, width);
|
|
1010 }
|
|
1011
|
|
1012 for(y=0; y<height; y++){
|
|
1013 for(x=0; x<width; x++){
|
|
1014 buffer[y*stride + x] /= SCALEX;
|
|
1015 }
|
|
1016 }
|
|
1017 }
|
|
1018
|
|
1019 static void horizontal_decompose53i(DWTELEM *b, int width){
|
|
1020 DWTELEM temp[width];
|
|
1021 const int width2= width>>1;
|
|
1022 int x;
|
|
1023 const int w2= (width+1)>>1;
|
|
1024
|
|
1025 for(x=0; x<width2; x++){
|
|
1026 temp[x ]= b[2*x ];
|
|
1027 temp[x+w2]= b[2*x + 1];
|
|
1028 }
|
|
1029 if(width&1)
|
|
1030 temp[x ]= b[2*x ];
|
|
1031 #if 0
|
|
1032 {
|
|
1033 int A1,A2,A3,A4;
|
|
1034 A2= temp[1 ];
|
|
1035 A4= temp[0 ];
|
|
1036 A1= temp[0+width2];
|
|
1037 A1 -= (A2 + A4)>>1;
|
|
1038 A4 += (A1 + 1)>>1;
|
|
1039 b[0+width2] = A1;
|
|
1040 b[0 ] = A4;
|
|
1041 for(x=1; x+1<width2; x+=2){
|
|
1042 A3= temp[x+width2];
|
|
1043 A4= temp[x+1 ];
|
|
1044 A3 -= (A2 + A4)>>1;
|
|
1045 A2 += (A1 + A3 + 2)>>2;
|
|
1046 b[x+width2] = A3;
|
|
1047 b[x ] = A2;
|
|
1048
|
|
1049 A1= temp[x+1+width2];
|
|
1050 A2= temp[x+2 ];
|
|
1051 A1 -= (A2 + A4)>>1;
|
|
1052 A4 += (A1 + A3 + 2)>>2;
|
|
1053 b[x+1+width2] = A1;
|
|
1054 b[x+1 ] = A4;
|
|
1055 }
|
|
1056 A3= temp[width-1];
|
|
1057 A3 -= A2;
|
|
1058 A2 += (A1 + A3 + 2)>>2;
|
|
1059 b[width -1] = A3;
|
|
1060 b[width2-1] = A2;
|
|
1061 }
|
|
1062 #else
|
|
1063 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
|
|
1064 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
|
|
1065 #endif
|
|
1066 }
|
|
1067
|
|
1068 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
|
|
1069 int i;
|
|
1070
|
|
1071 for(i=0; i<width; i++){
|
|
1072 b1[i] -= (b0[i] + b2[i])>>1;
|
|
1073 }
|
|
1074 }
|
|
1075
|
|
1076 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
|
|
1077 int i;
|
|
1078
|
|
1079 for(i=0; i<width; i++){
|
|
1080 b1[i] += (b0[i] + b2[i] + 2)>>2;
|
|
1081 }
|
|
1082 }
|
|
1083
|
|
1084 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
|
|
1085 int y;
|
|
1086 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
|
|
1087 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
|
|
1088
|
|
1089 for(y=-2; y<height; y+=2){
|
|
1090 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
|
|
1091 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
|
|
1092
|
|
1093 {START_TIMER
|
|
1094 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
|
|
1095 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
|
|
1096 STOP_TIMER("horizontal_decompose53i")}
|
|
1097
|
|
1098 {START_TIMER
|
|
1099 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
|
|
1100 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
|
|
1101 STOP_TIMER("vertical_decompose53i*")}
|
|
1102
|
|
1103 b0=b2;
|
|
1104 b1=b3;
|
|
1105 }
|
|
1106 }
|
|
1107
|
|
1108 static void horizontal_decompose97i(DWTELEM *b, int width){
|
|
1109 DWTELEM temp[width];
|
|
1110 const int w2= (width+1)>>1;
|
|
1111
|
|
1112 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
|
|
1113 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
|
|
1114 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
|
|
1115 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
|
|
1116 }
|
|
1117
|
|
1118
|
|
1119 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
|
|
1120 int i;
|
|
1121
|
|
1122 for(i=0; i<width; i++){
|
|
1123 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
|
|
1124 }
|
|
1125 }
|
|
1126
|
|
1127 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
|
|
1128 int i;
|
|
1129
|
|
1130 for(i=0; i<width; i++){
|
|
1131 #ifdef lift5
|
|
1132 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
|
|
1133 #else
|
|
1134 int r= 3*(b0[i] + b2[i]);
|
|
1135 r+= r>>4;
|
|
1136 r+= r>>8;
|
|
1137 b1[i] += (r+W_CO)>>W_CS;
|
|
1138 #endif
|
|
1139 }
|
|
1140 }
|
|
1141
|
|
1142 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
|
|
1143 int i;
|
|
1144
|
|
1145 for(i=0; i<width; i++){
|
|
1146 #ifdef liftS
|
|
1147 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
|
|
1148 #else
|
|
1149 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
|
|
1150 #endif
|
|
1151 }
|
|
1152 }
|
|
1153
|
|
1154 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
|
|
1155 int i;
|
|
1156
|
|
1157 for(i=0; i<width; i++){
|
|
1158 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
|
|
1159 }
|
|
1160 }
|
|
1161
|
|
1162 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
|
|
1163 int y;
|
|
1164 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
|
|
1165 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
|
|
1166 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
|
|
1167 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
|
|
1168
|
|
1169 for(y=-4; y<height; y+=2){
|
|
1170 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
|
|
1171 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
|
|
1172
|
|
1173 {START_TIMER
|
|
1174 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
|
|
1175 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
|
|
1176 if(width>400){
|
|
1177 STOP_TIMER("horizontal_decompose97i")
|
|
1178 }}
|
|
1179
|
|
1180 {START_TIMER
|
|
1181 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
|
|
1182 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
|
|
1183 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
|
|
1184 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
|
|
1185
|
|
1186 if(width>400){
|
|
1187 STOP_TIMER("vertical_decompose97i")
|
|
1188 }}
|
|
1189
|
|
1190 b0=b2;
|
|
1191 b1=b3;
|
|
1192 b2=b4;
|
|
1193 b3=b5;
|
|
1194 }
|
|
1195 }
|
|
1196
|
|
1197 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
|
|
1198 int level;
|
|
1199
|
|
1200 for(level=0; level<decomposition_count; level++){
|
|
1201 switch(type){
|
|
1202 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
|
|
1203 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
|
|
1204 case DWT_X: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
|
|
1205 }
|
|
1206 }
|
|
1207 }
|
|
1208
|
|
1209 static void horizontal_compose53i(DWTELEM *b, int width){
|
|
1210 DWTELEM temp[width];
|
|
1211 const int width2= width>>1;
|
|
1212 const int w2= (width+1)>>1;
|
|
1213 int x;
|
|
1214
|
|
1215 #if 0
|
|
1216 int A1,A2,A3,A4;
|
|
1217 A2= temp[1 ];
|
|
1218 A4= temp[0 ];
|
|
1219 A1= temp[0+width2];
|
|
1220 A1 -= (A2 + A4)>>1;
|
|
1221 A4 += (A1 + 1)>>1;
|
|
1222 b[0+width2] = A1;
|
|
1223 b[0 ] = A4;
|
|
1224 for(x=1; x+1<width2; x+=2){
|
|
1225 A3= temp[x+width2];
|
|
1226 A4= temp[x+1 ];
|
|
1227 A3 -= (A2 + A4)>>1;
|
|
1228 A2 += (A1 + A3 + 2)>>2;
|
|
1229 b[x+width2] = A3;
|
|
1230 b[x ] = A2;
|
|
1231
|
|
1232 A1= temp[x+1+width2];
|
|
1233 A2= temp[x+2 ];
|
|
1234 A1 -= (A2 + A4)>>1;
|
|
1235 A4 += (A1 + A3 + 2)>>2;
|
|
1236 b[x+1+width2] = A1;
|
|
1237 b[x+1 ] = A4;
|
|
1238 }
|
|
1239 A3= temp[width-1];
|
|
1240 A3 -= A2;
|
|
1241 A2 += (A1 + A3 + 2)>>2;
|
|
1242 b[width -1] = A3;
|
|
1243 b[width2-1] = A2;
|
|
1244 #else
|
|
1245 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
|
|
1246 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
|
|
1247 #endif
|
|
1248 for(x=0; x<width2; x++){
|
|
1249 b[2*x ]= temp[x ];
|
|
1250 b[2*x + 1]= temp[x+w2];
|
|
1251 }
|
|
1252 if(width&1)
|
|
1253 b[2*x ]= temp[x ];
|
|
1254 }
|
|
1255
|
|
1256 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
|
|
1257 int i;
|
|
1258
|
|
1259 for(i=0; i<width; i++){
|
|
1260 b1[i] += (b0[i] + b2[i])>>1;
|
|
1261 }
|
|
1262 }
|
|
1263
|
|
1264 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
|
|
1265 int i;
|
|
1266
|
|
1267 for(i=0; i<width; i++){
|
|
1268 b1[i] -= (b0[i] + b2[i] + 2)>>2;
|
|
1269 }
|
|
1270 }
|
|
1271
|
|
1272 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
|
|
1273 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
|
|
1274 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
|
|
1275 cs->y = -1;
|
|
1276 }
|
|
1277
|
|
1278 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
|
|
1279 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
|
|
1280 cs->b1 = buffer + mirror(-1 , height-1)*stride;
|
|
1281 cs->y = -1;
|
|
1282 }
|
|
1283
|
|
1284 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
|
|
1285 int y= cs->y;
|
|
1286
|
|
1287 DWTELEM *b0= cs->b0;
|
|
1288 DWTELEM *b1= cs->b1;
|
|
1289 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
|
|
1290 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
|
|
1291
|
|
1292 {START_TIMER
|
|
1293 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
|
|
1294 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
|
|
1295 STOP_TIMER("vertical_compose53i*")}
|
|
1296
|
|
1297 {START_TIMER
|
|
1298 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
|
|
1299 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
|
|
1300 STOP_TIMER("horizontal_compose53i")}
|
|
1301
|
|
1302 cs->b0 = b2;
|
|
1303 cs->b1 = b3;
|
|
1304 cs->y += 2;
|
|
1305 }
|
|
1306
|
|
1307 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
|
|
1308 int y= cs->y;
|
|
1309 DWTELEM *b0= cs->b0;
|
|
1310 DWTELEM *b1= cs->b1;
|
|
1311 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
|
|
1312 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
|
|
1313
|
|
1314 {START_TIMER
|
|
1315 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
|
|
1316 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
|
|
1317 STOP_TIMER("vertical_compose53i*")}
|
|
1318
|
|
1319 {START_TIMER
|
|
1320 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
|
|
1321 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
|
|
1322 STOP_TIMER("horizontal_compose53i")}
|
|
1323
|
|
1324 cs->b0 = b2;
|
|
1325 cs->b1 = b3;
|
|
1326 cs->y += 2;
|
|
1327 }
|
|
1328
|
|
1329 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
|
|
1330 dwt_compose_t cs;
|
|
1331 spatial_compose53i_init(&cs, buffer, height, stride);
|
|
1332 while(cs.y <= height)
|
|
1333 spatial_compose53i_dy(&cs, buffer, width, height, stride);
|
|
1334 }
|
|
1335
|
|
1336
|
|
1337 void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
|
|
1338 DWTELEM temp[width];
|
|
1339 const int w2= (width+1)>>1;
|
|
1340
|
|
1341 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
|
|
1342 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
|
|
1343 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
|
|
1344 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
|
|
1345 }
|
|
1346
|
|
1347 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
|
|
1348 int i;
|
|
1349
|
|
1350 for(i=0; i<width; i++){
|
|
1351 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
|
|
1352 }
|
|
1353 }
|
|
1354
|
|
1355 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
|
|
1356 int i;
|
|
1357
|
|
1358 for(i=0; i<width; i++){
|
|
1359 #ifdef lift5
|
|
1360 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
|
|
1361 #else
|
|
1362 int r= 3*(b0[i] + b2[i]);
|
|
1363 r+= r>>4;
|
|
1364 r+= r>>8;
|
|
1365 b1[i] -= (r+W_CO)>>W_CS;
|
|
1366 #endif
|
|
1367 }
|
|
1368 }
|
|
1369
|
|
1370 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
|
|
1371 int i;
|
|
1372
|
|
1373 for(i=0; i<width; i++){
|
|
1374 #ifdef liftS
|
|
1375 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
|
|
1376 #else
|
|
1377 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
|
|
1378 #endif
|
|
1379 }
|
|
1380 }
|
|
1381
|
|
1382 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
|
|
1383 int i;
|
|
1384
|
|
1385 for(i=0; i<width; i++){
|
|
1386 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
|
|
1387 }
|
|
1388 }
|
|
1389
|
|
1390 void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
|
|
1391 int i;
|
|
1392
|
|
1393 for(i=0; i<width; i++){
|
|
1394 #ifndef lift5
|
|
1395 int r;
|
|
1396 #endif
|
|
1397 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
|
|
1398 #ifdef lift5
|
|
1399 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
|
|
1400 #else
|
|
1401 r= 3*(b2[i] + b4[i]);
|
|
1402 r+= r>>4;
|
|
1403 r+= r>>8;
|
|
1404 b3[i] -= (r+W_CO)>>W_CS;
|
|
1405 #endif
|
|
1406 #ifdef liftS
|
|
1407 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
|
|
1408 #else
|
|
1409 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
|
|
1410 #endif
|
|
1411 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
|
|
1412 }
|
|
1413 }
|
|
1414
|
|
1415 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
|
|
1416 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
|
|
1417 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
|
|
1418 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
|
|
1419 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
|
|
1420 cs->y = -3;
|
|
1421 }
|
|
1422
|
|
1423 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
|
|
1424 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
|
|
1425 cs->b1 = buffer + mirror(-3 , height-1)*stride;
|
|
1426 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
|
|
1427 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
|
|
1428 cs->y = -3;
|
|
1429 }
|
|
1430
|
|
1431 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
|
|
1432 int y = cs->y;
|
|
1433
|
|
1434 DWTELEM *b0= cs->b0;
|
|
1435 DWTELEM *b1= cs->b1;
|
|
1436 DWTELEM *b2= cs->b2;
|
|
1437 DWTELEM *b3= cs->b3;
|
|
1438 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
|
|
1439 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
|
|
1440
|
|
1441 {START_TIMER
|
|
1442 if(y>0 && y+4<height){
|
|
1443 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
|
|
1444 }else{
|
|
1445 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
|
|
1446 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
|
|
1447 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
|
|
1448 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
|
|
1449 }
|
|
1450 if(width>400){
|
|
1451 STOP_TIMER("vertical_compose97i")}}
|
|
1452
|
|
1453 {START_TIMER
|
|
1454 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
|
|
1455 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
|
|
1456 if(width>400 && y+0<(unsigned)height){
|
|
1457 STOP_TIMER("horizontal_compose97i")}}
|
|
1458
|
|
1459 cs->b0=b2;
|
|
1460 cs->b1=b3;
|
|
1461 cs->b2=b4;
|
|
1462 cs->b3=b5;
|
|
1463 cs->y += 2;
|
|
1464 }
|
|
1465
|
|
1466 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
|
|
1467 int y = cs->y;
|
|
1468 DWTELEM *b0= cs->b0;
|
|
1469 DWTELEM *b1= cs->b1;
|
|
1470 DWTELEM *b2= cs->b2;
|
|
1471 DWTELEM *b3= cs->b3;
|
|
1472 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
|
|
1473 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
|
|
1474
|
|
1475 {START_TIMER
|
|
1476 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
|
|
1477 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
|
|
1478 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
|
|
1479 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
|
|
1480 if(width>400){
|
|
1481 STOP_TIMER("vertical_compose97i")}}
|
|
1482
|
|
1483 {START_TIMER
|
|
1484 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
|
|
1485 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
|
|
1486 if(width>400 && b0 <= b2){
|
|
1487 STOP_TIMER("horizontal_compose97i")}}
|
|
1488
|
|
1489 cs->b0=b2;
|
|
1490 cs->b1=b3;
|
|
1491 cs->b2=b4;
|
|
1492 cs->b3=b5;
|
|
1493 cs->y += 2;
|
|
1494 }
|
|
1495
|
|
1496 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
|
|
1497 dwt_compose_t cs;
|
|
1498 spatial_compose97i_init(&cs, buffer, height, stride);
|
|
1499 while(cs.y <= height)
|
|
1500 spatial_compose97i_dy(&cs, buffer, width, height, stride);
|
|
1501 }
|
|
1502
|
|
1503 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
|
|
1504 int level;
|
|
1505 for(level=decomposition_count-1; level>=0; level--){
|
|
1506 switch(type){
|
|
1507 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
|
|
1508 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
|
|
1509 /* not slicified yet */
|
|
1510 case DWT_X: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
|
|
1511 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
|
|
1512 }
|
|
1513 }
|
|
1514 }
|
|
1515
|
|
1516 static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
|
|
1517 int level;
|
|
1518 for(level=decomposition_count-1; level>=0; level--){
|
|
1519 switch(type){
|
|
1520 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
|
|
1521 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
|
|
1522 /* not slicified yet */
|
|
1523 case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
|
|
1524 }
|
|
1525 }
|
|
1526 }
|
|
1527
|
|
1528 static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
|
|
1529 const int support = type==1 ? 3 : 5;
|
|
1530 int level;
|
|
1531 if(type==2) return;
|
|
1532
|
|
1533 for(level=decomposition_count-1; level>=0; level--){
|
|
1534 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
|
|
1535 switch(type){
|
|
1536 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
|
|
1537 break;
|
|
1538 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
|
|
1539 break;
|
|
1540 case DWT_X: break;
|
|
1541 }
|
|
1542 }
|
|
1543 }
|
|
1544 }
|
|
1545
|
|
1546 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
|
|
1547 const int support = type==1 ? 3 : 5;
|
|
1548 int level;
|
|
1549 if(type==2) return;
|
|
1550
|
|
1551 for(level=decomposition_count-1; level>=0; level--){
|
|
1552 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
|
|
1553 switch(type){
|
|
1554 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
|
|
1555 break;
|
|
1556 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
|
|
1557 break;
|
|
1558 case DWT_X: break;
|
|
1559 }
|
|
1560 }
|
|
1561 }
|
|
1562 }
|
|
1563
|
|
1564 static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
|
|
1565 if(type==2){
|
|
1566 int level;
|
|
1567 for(level=decomposition_count-1; level>=0; level--)
|
|
1568 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
|
|
1569 }else{
|
|
1570 dwt_compose_t cs[MAX_DECOMPOSITIONS];
|
|
1571 int y;
|
|
1572 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
|
|
1573 for(y=0; y<height; y+=4)
|
|
1574 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
|
|
1575 }
|
|
1576 }
|
|
1577
|
|
1578 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
|
|
1579 const int w= b->width;
|
|
1580 const int h= b->height;
|
|
1581 int x, y;
|
|
1582
|
|
1583 if(1){
|
|
1584 int run=0;
|
|
1585 int runs[w*h];
|
|
1586 int run_index=0;
|
|
1587 int max_index;
|
|
1588
|
|
1589 for(y=0; y<h; y++){
|
|
1590 for(x=0; x<w; x++){
|
|
1591 int v, p=0;
|
|
1592 int /*ll=0, */l=0, lt=0, t=0, rt=0;
|
|
1593 v= src[x + y*stride];
|
|
1594
|
|
1595 if(y){
|
|
1596 t= src[x + (y-1)*stride];
|
|
1597 if(x){
|
|
1598 lt= src[x - 1 + (y-1)*stride];
|
|
1599 }
|
|
1600 if(x + 1 < w){
|
|
1601 rt= src[x + 1 + (y-1)*stride];
|
|
1602 }
|
|
1603 }
|
|
1604 if(x){
|
|
1605 l= src[x - 1 + y*stride];
|
|
1606 /*if(x > 1){
|
|
1607 if(orientation==1) ll= src[y + (x-2)*stride];
|
|
1608 else ll= src[x - 2 + y*stride];
|
|
1609 }*/
|
|
1610 }
|
|
1611 if(parent){
|
|
1612 int px= x>>1;
|
|
1613 int py= y>>1;
|
|
1614 if(px<b->parent->width && py<b->parent->height)
|
|
1615 p= parent[px + py*2*stride];
|
|
1616 }
|
|
1617 if(!(/*ll|*/l|lt|t|rt|p)){
|
|
1618 if(v){
|
|
1619 runs[run_index++]= run;
|
|
1620 run=0;
|
|
1621 }else{
|
|
1622 run++;
|
|
1623 }
|
|
1624 }
|
|
1625 }
|
|
1626 }
|
|
1627 max_index= run_index;
|
|
1628 runs[run_index++]= run;
|
|
1629 run_index=0;
|
|
1630 run= runs[run_index++];
|
|
1631
|
|
1632 put_symbol2(&s->c, b->state[30], max_index, 0);
|
|
1633 if(run_index <= max_index)
|
|
1634 put_symbol2(&s->c, b->state[1], run, 3);
|
|
1635
|
|
1636 for(y=0; y<h; y++){
|
|
1637 if(s->c.bytestream_end - s->c.bytestream < w*40){
|
|
1638 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
|
|
1639 return -1;
|
|
1640 }
|
|
1641 for(x=0; x<w; x++){
|
|
1642 int v, p=0;
|
|
1643 int /*ll=0, */l=0, lt=0, t=0, rt=0;
|
|
1644 v= src[x + y*stride];
|
|
1645
|
|
1646 if(y){
|
|
1647 t= src[x + (y-1)*stride];
|
|
1648 if(x){
|
|
1649 lt= src[x - 1 + (y-1)*stride];
|
|
1650 }
|
|
1651 if(x + 1 < w){
|
|
1652 rt= src[x + 1 + (y-1)*stride];
|
|
1653 }
|
|
1654 }
|
|
1655 if(x){
|
|
1656 l= src[x - 1 + y*stride];
|
|
1657 /*if(x > 1){
|
|
1658 if(orientation==1) ll= src[y + (x-2)*stride];
|
|
1659 else ll= src[x - 2 + y*stride];
|
|
1660 }*/
|
|
1661 }
|
|
1662 if(parent){
|
|
1663 int px= x>>1;
|
|
1664 int py= y>>1;
|
|
1665 if(px<b->parent->width && py<b->parent->height)
|
|
1666 p= parent[px + py*2*stride];
|
|
1667 }
|
|
1668 if(/*ll|*/l|lt|t|rt|p){
|
|
1669 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
|
|
1670
|
|
1671 put_rac(&s->c, &b->state[0][context], !!v);
|
|
1672 }else{
|
|
1673 if(!run){
|
|
1674 run= runs[run_index++];
|
|
1675
|
|
1676 if(run_index <= max_index)
|
|
1677 put_symbol2(&s->c, b->state[1], run, 3);
|
|
1678 assert(v);
|
|
1679 }else{
|
|
1680 run--;
|
|
1681 assert(!v);
|
|
1682 }
|
|
1683 }
|
|
1684 if(v){
|
|
1685 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
|
|
1686 int l2= 2*FFABS(l) + (l<0);
|
|
1687 int t2= 2*FFABS(t) + (t<0);
|
|
1688
|
|
1689 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
|
|
1690 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
|
|
1691 }
|
|
1692 }
|
|
1693 }
|
|
1694 }
|
|
1695 return 0;
|
|
1696 }
|
|
1697
|
|
1698 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
|
|
1699 // encode_subband_qtree(s, b, src, parent, stride, orientation);
|
|
1700 // encode_subband_z0run(s, b, src, parent, stride, orientation);
|
|
1701 return encode_subband_c0run(s, b, src, parent, stride, orientation);
|
|
1702 // encode_subband_dzr(s, b, src, parent, stride, orientation);
|
|
1703 }
|
|
1704
|
|
1705 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
|
|
1706 const int w= b->width;
|
|
1707 const int h= b->height;
|
|
1708 int x,y;
|
|
1709
|
|
1710 if(1){
|
|
1711 int run, runs;
|
|
1712 x_and_coeff *xc= b->x_coeff;
|
|
1713 x_and_coeff *prev_xc= NULL;
|
|
1714 x_and_coeff *prev2_xc= xc;
|
|
1715 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
|
|
1716 x_and_coeff *prev_parent_xc= parent_xc;
|
|
1717
|
|
1718 runs= get_symbol2(&s->c, b->state[30], 0);
|
|
1719 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
|
|
1720 else run= INT_MAX;
|
|
1721
|
|
1722 for(y=0; y<h; y++){
|
|
1723 int v=0;
|
|
1724 int lt=0, t=0, rt=0;
|
|
1725
|
|
1726 if(y && prev_xc->x == 0){
|
|
1727 rt= prev_xc->coeff;
|
|
1728 }
|
|
1729 for(x=0; x<w; x++){
|
|
1730 int p=0;
|
|
1731 const int l= v;
|
|
1732
|
|
1733 lt= t; t= rt;
|
|
1734
|
|
1735 if(y){
|
|
1736 if(prev_xc->x <= x)
|
|
1737 prev_xc++;
|
|
1738 if(prev_xc->x == x + 1)
|
|
1739 rt= prev_xc->coeff;
|
|
1740 else
|
|
1741 rt=0;
|
|
1742 }
|
|
1743 if(parent_xc){
|
|
1744 if(x>>1 > parent_xc->x){
|
|
1745 parent_xc++;
|
|
1746 }
|
|
1747 if(x>>1 == parent_xc->x){
|
|
1748 p= parent_xc->coeff;
|
|
1749 }
|
|
1750 }
|
|
1751 if(/*ll|*/l|lt|t|rt|p){
|
|
1752 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
|
|
1753
|
|
1754 v=get_rac(&s->c, &b->state[0][context]);
|
|
1755 if(v){
|
|
1756 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
|
|
1757 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
|
|
1758
|
|
1759 xc->x=x;
|
|
1760 (xc++)->coeff= v;
|
|
1761 }
|
|
1762 }else{
|
|
1763 if(!run){
|
|
1764 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
|
|
1765 else run= INT_MAX;
|
|
1766 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
|
|
1767 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
|
|
1768
|
|
1769 xc->x=x;
|
|
1770 (xc++)->coeff= v;
|
|
1771 }else{
|
|
1772 int max_run;
|
|
1773 run--;
|
|
1774 v=0;
|
|
1775
|
|
1776 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
|
|
1777 else max_run= FFMIN(run, w-x-1);
|
|
1778 if(parent_xc)
|
|
1779 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
|
|
1780 x+= max_run;
|
|
1781 run-= max_run;
|
|
1782 }
|
|
1783 }
|
|
1784 }
|
|
1785 (xc++)->x= w+1; //end marker
|
|
1786 prev_xc= prev2_xc;
|
|
1787 prev2_xc= xc;
|
|
1788
|
|
1789 if(parent_xc){
|
|
1790 if(y&1){
|
|
1791 while(parent_xc->x != parent->width+1)
|
|
1792 parent_xc++;
|
|
1793 parent_xc++;
|
|
1794 prev_parent_xc= parent_xc;
|
|
1795 }else{
|
|
1796 parent_xc= prev_parent_xc;
|
|
1797 }
|
|
1798 }
|
|
1799 }
|
|
1800
|
|
1801 (xc++)->x= w+1; //end marker
|
|
1802 }
|
|
1803 }
|
|
1804
|
|
1805 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
|
|
1806 const int w= b->width;
|
|
1807 int y;
|
|
1808 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
|
|
1809 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
|
|
1810 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
|
|
1811 int new_index = 0;
|
|
1812
|
|
1813 START_TIMER
|
|
1814
|
|
1815 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
|
|
1816 qadd= 0;
|
|
1817 qmul= 1<<QEXPSHIFT;
|
|
1818 }
|
|
1819
|
|
1820 /* If we are on the second or later slice, restore our index. */
|
|
1821 if (start_y != 0)
|
|
1822 new_index = save_state[0];
|
|
1823
|
|
1824
|
|
1825 for(y=start_y; y<h; y++){
|
|
1826 int x = 0;
|
|
1827 int v;
|
|
1828 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
|
|
1829 memset(line, 0, b->width*sizeof(DWTELEM));
|
|
1830 v = b->x_coeff[new_index].coeff;
|
|
1831 x = b->x_coeff[new_index++].x;
|
|
1832 while(x < w)
|
|
1833 {
|
|
1834 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
|
|
1835 register int u= -(v&1);
|
|
1836 line[x] = (t^u) - u;
|
|
1837
|
|
1838 v = b->x_coeff[new_index].coeff;
|
|
1839 x = b->x_coeff[new_index++].x;
|
|
1840 }
|
|
1841 }
|
|
1842 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
|
|
1843 STOP_TIMER("decode_subband")
|
|
1844 }
|
|
1845
|
|
1846 /* Save our variables for the next slice. */
|
|
1847 save_state[0] = new_index;
|
|
1848
|
|
1849 return;
|
|
1850 }
|
|
1851
|
|
1852 static void reset_contexts(SnowContext *s){
|
|
1853 int plane_index, level, orientation;
|
|
1854
|
|
1855 for(plane_index=0; plane_index<3; plane_index++){
|
|
1856 for(level=0; level<s->spatial_decomposition_count; level++){
|
|
1857 for(orientation=level ? 1:0; orientation<4; orientation++){
|
|
1858 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
|
|
1859 }
|
|
1860 }
|
|
1861 }
|
|
1862 memset(s->header_state, MID_STATE, sizeof(s->header_state));
|
|
1863 memset(s->block_state, MID_STATE, sizeof(s->block_state));
|
|
1864 }
|
|
1865
|
|
1866 static int alloc_blocks(SnowContext *s){
|
|
1867 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
|
|
1868 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
|
|
1869
|
|
1870 s->b_width = w;
|
|
1871 s->b_height= h;
|
|
1872
|
|
1873 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
|
|
1874 return 0;
|
|
1875 }
|
|
1876
|
|
1877 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
|
|
1878 uint8_t *bytestream= d->bytestream;
|
|
1879 uint8_t *bytestream_start= d->bytestream_start;
|
|
1880 *d= *s;
|
|
1881 d->bytestream= bytestream;
|
|
1882 d->bytestream_start= bytestream_start;
|
|
1883 }
|
|
1884
|
|
1885 //near copy & paste from dsputil, FIXME
|
|
1886 static int pix_sum(uint8_t * pix, int line_size, int w)
|
|
1887 {
|
|
1888 int s, i, j;
|
|
1889
|
|
1890 s = 0;
|
|
1891 for (i = 0; i < w; i++) {
|
|
1892 for (j = 0; j < w; j++) {
|
|
1893 s += pix[0];
|
|
1894 pix ++;
|
|
1895 }
|
|
1896 pix += line_size - w;
|
|
1897 }
|
|
1898 return s;
|
|
1899 }
|
|
1900
|
|
1901 //near copy & paste from dsputil, FIXME
|
|
1902 static int pix_norm1(uint8_t * pix, int line_size, int w)
|
|
1903 {
|
|
1904 int s, i, j;
|
|
1905 uint32_t *sq = squareTbl + 256;
|
|
1906
|
|
1907 s = 0;
|
|
1908 for (i = 0; i < w; i++) {
|
|
1909 for (j = 0; j < w; j ++) {
|
|
1910 s += sq[pix[0]];
|
|
1911 pix ++;
|
|
1912 }
|
|
1913 pix += line_size - w;
|
|
1914 }
|
|
1915 return s;
|
|
1916 }
|
|
1917
|
|
1918 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
|
|
1919 const int w= s->b_width << s->block_max_depth;
|
|
1920 const int rem_depth= s->block_max_depth - level;
|
|
1921 const int index= (x + y*w) << rem_depth;
|
|
1922 const int block_w= 1<<rem_depth;
|
|
1923 BlockNode block;
|
|
1924 int i,j;
|
|
1925
|
|
1926 block.color[0]= l;
|
|
1927 block.color[1]= cb;
|
|
1928 block.color[2]= cr;
|
|
1929 block.mx= mx;
|
|
1930 block.my= my;
|
|
1931 block.ref= ref;
|
|
1932 block.type= type;
|
|
1933 block.level= level;
|
|
1934
|
|
1935 for(j=0; j<block_w; j++){
|
|
1936 for(i=0; i<block_w; i++){
|
|
1937 s->block[index + i + j*w]= block;
|
|
1938 }
|
|
1939 }
|
|
1940 }
|
|
1941
|
|
1942 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
|
|
1943 const int offset[3]= {
|
|
1944 y*c-> stride + x,
|
|
1945 ((y*c->uvstride + x)>>1),
|
|
1946 ((y*c->uvstride + x)>>1),
|
|
1947 };
|
|
1948 int i;
|
|
1949 for(i=0; i<3; i++){
|
|
1950 c->src[0][i]= src [i];
|
|
1951 c->ref[0][i]= ref [i] + offset[i];
|
|
1952 }
|
|
1953 assert(!ref_index);
|
|
1954 }
|
|
1955
|
|
1956 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
|
|
1957 BlockNode *left, BlockNode *top, BlockNode *tr){
|
|
1958 if(s->ref_frames == 1){
|
|
1959 *mx = mid_pred(left->mx, top->mx, tr->mx);
|
|
1960 *my = mid_pred(left->my, top->my, tr->my);
|
|
1961 }else{
|
|
1962 const int *scale = scale_mv_ref[ref];
|
|
1963 *mx = mid_pred(left->mx * scale[left->ref] + 128 >>8,
|
|
1964 top ->mx * scale[top ->ref] + 128 >>8,
|
|
1965 tr ->mx * scale[tr ->ref] + 128 >>8);
|
|
1966 *my = mid_pred(left->my * scale[left->ref] + 128 >>8,
|
|
1967 top ->my * scale[top ->ref] + 128 >>8,
|
|
1968 tr ->my * scale[tr ->ref] + 128 >>8);
|
|
1969 }
|
|
1970 }
|
|
1971
|
|
1972 //FIXME copy&paste
|
|
1973 #define P_LEFT P[1]
|
|
1974 #define P_TOP P[2]
|
|
1975 #define P_TOPRIGHT P[3]
|
|
1976 #define P_MEDIAN P[4]
|
|
1977 #define P_MV1 P[9]
|
|
1978 #define FLAG_QPEL 1 //must be 1
|
|
1979
|
|
1980 static int encode_q_branch(SnowContext *s, int level, int x, int y){
|
|
1981 uint8_t p_buffer[1024];
|
|
1982 uint8_t i_buffer[1024];
|
|
1983 uint8_t p_state[sizeof(s->block_state)];
|
|
1984 uint8_t i_state[sizeof(s->block_state)];
|
|
1985 RangeCoder pc, ic;
|
|
1986 uint8_t *pbbak= s->c.bytestream;
|
|
1987 uint8_t *pbbak_start= s->c.bytestream_start;
|
|
1988 int score, score2, iscore, i_len, p_len, block_s, sum;
|
|
1989 const int w= s->b_width << s->block_max_depth;
|
|
1990 const int h= s->b_height << s->block_max_depth;
|
|
1991 const int rem_depth= s->block_max_depth - level;
|
|
1992 const int index= (x + y*w) << rem_depth;
|
|
1993 const int block_w= 1<<(LOG2_MB_SIZE - level);
|
|
1994 int trx= (x+1)<<rem_depth;
|
|
1995 int try= (y+1)<<rem_depth;
|
|
1996 BlockNode *left = x ? &s->block[index-1] : &null_block;
|
|
1997 BlockNode *top = y ? &s->block[index-w] : &null_block;
|
|
1998 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
|
|
1999 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
|
|
2000 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
|
|
2001 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
|
|
2002 int pl = left->color[0];
|
|
2003 int pcb= left->color[1];
|
|
2004 int pcr= left->color[2];
|
|
2005 int pmx, pmy;
|
|
2006 int mx=0, my=0;
|
|
2007 int l,cr,cb;
|
|
2008 const int stride= s->current_picture.linesize[0];
|
|
2009 const int uvstride= s->current_picture.linesize[1];
|
|
2010 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
|
|
2011 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
|
|
2012 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
|
|
2013 int P[10][2];
|
|
2014 int16_t last_mv[3][2];
|
|
2015 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
|
|
2016 const int shift= 1+qpel;
|
|
2017 MotionEstContext *c= &s->m.me;
|
|
2018 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
|
|
2019 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
|
|
2020 int my_context= av_log2(2*FFABS(left->my - top->my));
|
|
2021 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
|
|
2022 int ref, best_ref, ref_score, ref_mx, ref_my;
|
|
2023
|
|
2024 assert(sizeof(s->block_state) >= 256);
|
|
2025 if(s->keyframe){
|
|
2026 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
|
|
2027 return 0;
|
|
2028 }
|
|
2029
|
|
2030 // clip predictors / edge ?
|
|
2031
|
|
2032 P_LEFT[0]= left->mx;
|
|
2033 P_LEFT[1]= left->my;
|
|
2034 P_TOP [0]= top->mx;
|
|
2035 P_TOP [1]= top->my;
|
|
2036 P_TOPRIGHT[0]= tr->mx;
|
|
2037 P_TOPRIGHT[1]= tr->my;
|
|
2038
|
|
2039 last_mv[0][0]= s->block[index].mx;
|
|
2040 last_mv[0][1]= s->block[index].my;
|
|
2041 last_mv[1][0]= right->mx;
|
|
2042 last_mv[1][1]= right->my;
|
|
2043 last_mv[2][0]= bottom->mx;
|
|
2044 last_mv[2][1]= bottom->my;
|
|
2045
|
|
2046 s->m.mb_stride=2;
|
|
2047 s->m.mb_x=
|
|
2048 s->m.mb_y= 0;
|
|
2049 s->m.me.skip= 0;
|
|
2050
|
|
2051 assert(s->m.me. stride == stride);
|
|
2052 assert(s->m.me.uvstride == uvstride);
|
|
2053
|
|
2054 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
|
|
2055 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
|
|
2056 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
|
|
2057 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
|
|
2058
|
|
2059 c->xmin = - x*block_w - 16+2;
|
|
2060 c->ymin = - y*block_w - 16+2;
|
|
2061 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
|
|
2062 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
|
|
2063
|
|
2064 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
|
|
2065 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
|
|
2066 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
|
|
2067 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
|
|
2068 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
|
|
2069 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
|
|
2070 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
|
|
2071
|
|
2072 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
|
|
2073 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
|
|
2074
|
|
2075 if (!y) {
|
|
2076 c->pred_x= P_LEFT[0];
|
|
2077 c->pred_y= P_LEFT[1];
|
|
2078 } else {
|
|
2079 c->pred_x = P_MEDIAN[0];
|
|
2080 c->pred_y = P_MEDIAN[1];
|
|
2081 }
|
|
2082
|
|
2083 score= INT_MAX;
|
|
2084 best_ref= 0;
|
|
2085 for(ref=0; ref<s->ref_frames; ref++){
|
|
2086 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
|
|
2087
|
|
2088 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
|
|
2089 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
|
|
2090
|
|
2091 assert(ref_mx >= c->xmin);
|
|
2092 assert(ref_mx <= c->xmax);
|
|
2093 assert(ref_my >= c->ymin);
|
|
2094 assert(ref_my <= c->ymax);
|
|
2095
|
|
2096 ref_score= s->m.me.sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
|
|
2097 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
|
|
2098 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
|
|
2099 if(s->ref_mvs[ref]){
|
|
2100 s->ref_mvs[ref][index][0]= ref_mx;
|
|
2101 s->ref_mvs[ref][index][1]= ref_my;
|
|
2102 s->ref_scores[ref][index]= ref_score;
|
|
2103 }
|
|
2104 if(score > ref_score){
|
|
2105 score= ref_score;
|
|
2106 best_ref= ref;
|
|
2107 mx= ref_mx;
|
|
2108 my= ref_my;
|
|
2109 }
|
|
2110 }
|
|
2111 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
|
|
2112
|
|
2113 // subpel search
|
|
2114 pc= s->c;
|
|
2115 pc.bytestream_start=
|
|
2116 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
|
|
2117 memcpy(p_state, s->block_state, sizeof(s->block_state));
|
|
2118
|
|
2119 if(level!=s->block_max_depth)
|
|
2120 put_rac(&pc, &p_state[4 + s_context], 1);
|
|
2121 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
|
|
2122 if(s->ref_frames > 1)
|
|
2123 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
|
|
2124 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
|
|
2125 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
|
|
2126 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
|
|
2127 p_len= pc.bytestream - pc.bytestream_start;
|
|
2128 score += (s->lambda2*(p_len*8
|
|
2129 + (pc.outstanding_count - s->c.outstanding_count)*8
|
|
2130 + (-av_log2(pc.range) + av_log2(s->c.range))
|
|
2131 ))>>FF_LAMBDA_SHIFT;
|
|
2132
|
|
2133 block_s= block_w*block_w;
|
|
2134 sum = pix_sum(current_data[0], stride, block_w);
|
|
2135 l= (sum + block_s/2)/block_s;
|
|
2136 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
|
|
2137
|
|
2138 block_s= block_w*block_w>>2;
|
|
2139 sum = pix_sum(current_data[1], uvstride, block_w>>1);
|
|
2140 cb= (sum + block_s/2)/block_s;
|
|
2141 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
|
|
2142 sum = pix_sum(current_data[2], uvstride, block_w>>1);
|
|
2143 cr= (sum + block_s/2)/block_s;
|
|
2144 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
|
|
2145
|
|
2146 ic= s->c;
|
|
2147 ic.bytestream_start=
|
|
2148 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
|
|
2149 memcpy(i_state, s->block_state, sizeof(s->block_state));
|
|
2150 if(level!=s->block_max_depth)
|
|
2151 put_rac(&ic, &i_state[4 + s_context], 1);
|
|
2152 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
|
|
2153 put_symbol(&ic, &i_state[32], l-pl , 1);
|
|
2154 put_symbol(&ic, &i_state[64], cb-pcb, 1);
|
|
2155 put_symbol(&ic, &i_state[96], cr-pcr, 1);
|
|
2156 i_len= ic.bytestream - ic.bytestream_start;
|
|
2157 iscore += (s->lambda2*(i_len*8
|
|
2158 + (ic.outstanding_count - s->c.outstanding_count)*8
|
|
2159 + (-av_log2(ic.range) + av_log2(s->c.range))
|
|
2160 ))>>FF_LAMBDA_SHIFT;
|
|
2161
|
|
2162 // assert(score==256*256*256*64-1);
|
|
2163 assert(iscore < 255*255*256 + s->lambda2*10);
|
|
2164 assert(iscore >= 0);
|
|
2165 assert(l>=0 && l<=255);
|
|
2166 assert(pl>=0 && pl<=255);
|
|
2167
|
|
2168 if(level==0){
|
|
2169 int varc= iscore >> 8;
|
|
2170 int vard= score >> 8;
|
|
2171 if (vard <= 64 || vard < varc)
|
|
2172 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
|
|
2173 else
|
|
2174 c->scene_change_score+= s->m.qscale;
|
|
2175 }
|
|
2176
|
|
2177 if(level!=s->block_max_depth){
|
|
2178 put_rac(&s->c, &s->block_state[4 + s_context], 0);
|
|
2179 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
|
|
2180 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
|
|
2181 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
|
|
2182 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
|
|
2183 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
|
|
2184
|
|
2185 if(score2 < score && score2 < iscore)
|
|
2186 return score2;
|
|
2187 }
|
|
2188
|
|
2189 if(iscore < score){
|
|
2190 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
|
|
2191 memcpy(pbbak, i_buffer, i_len);
|
|
2192 s->c= ic;
|
|
2193 s->c.bytestream_start= pbbak_start;
|
|
2194 s->c.bytestream= pbbak + i_len;
|
|
2195 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
|
|
2196 memcpy(s->block_state, i_state, sizeof(s->block_state));
|
|
2197 return iscore;
|
|
2198 }else{
|
|
2199 memcpy(pbbak, p_buffer, p_len);
|
|
2200 s->c= pc;
|
|
2201 s->c.bytestream_start= pbbak_start;
|
|
2202 s->c.bytestream= pbbak + p_len;
|
|
2203 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
|
|
2204 memcpy(s->block_state, p_state, sizeof(s->block_state));
|
|
2205 return score;
|
|
2206 }
|
|
2207 }
|
|
2208
|
|
2209 static always_inline int same_block(BlockNode *a, BlockNode *b){
|
|
2210 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
|
|
2211 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
|
|
2212 }else{
|
|
2213 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
|
|
2214 }
|
|
2215 }
|
|
2216
|
|
2217 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
|
|
2218 const int w= s->b_width << s->block_max_depth;
|
|
2219 const int rem_depth= s->block_max_depth - level;
|
|
2220 const int index= (x + y*w) << rem_depth;
|
|
2221 int trx= (x+1)<<rem_depth;
|
|
2222 BlockNode *b= &s->block[index];
|
|
2223 BlockNode *left = x ? &s->block[index-1] : &null_block;
|
|
2224 BlockNode *top = y ? &s->block[index-w] : &null_block;
|
|
2225 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
|
|
2226 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
|
|
2227 int pl = left->color[0];
|
|
2228 int pcb= left->color[1];
|
|
2229 int pcr= left->color[2];
|
|
2230 int pmx, pmy;
|
|
2231 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
|
|
2232 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
|
|
2233 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
|
|
2234 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
|
|
2235
|
|
2236 if(s->keyframe){
|
|
2237 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
|
|
2238 return;
|
|
2239 }
|
|
2240
|
|
2241 if(level!=s->block_max_depth){
|
|
2242 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
|
|
2243 put_rac(&s->c, &s->block_state[4 + s_context], 1);
|
|
2244 }else{
|
|
2245 put_rac(&s->c, &s->block_state[4 + s_context], 0);
|
|
2246 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
|
|
2247 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
|
|
2248 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
|
|
2249 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
|
|
2250 return;
|
|
2251 }
|
|
2252 }
|
|
2253 if(b->type & BLOCK_INTRA){
|
|
2254 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
|
|
2255 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
|
|
2256 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
|
|
2257 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
|
|
2258 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
|
|
2259 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
|
|
2260 }else{
|
|
2261 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
|
|
2262 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
|
|
2263 if(s->ref_frames > 1)
|
|
2264 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
|
|
2265 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
|
|
2266 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
|
|
2267 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
|
|
2268 }
|
|
2269 }
|
|
2270
|
|
2271 static void decode_q_branch(SnowContext *s, int level, int x, int y){
|
|
2272 const int w= s->b_width << s->block_max_depth;
|
|
2273 const int rem_depth= s->block_max_depth - level;
|
|
2274 const int index= (x + y*w) << rem_depth;
|
|
2275 int trx= (x+1)<<rem_depth;
|
|
2276 BlockNode *left = x ? &s->block[index-1] : &null_block;
|
|
2277 BlockNode *top = y ? &s->block[index-w] : &null_block;
|
|
2278 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
|
|
2279 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
|
|
2280 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
|
|
2281
|
|
2282 if(s->keyframe){
|
|
2283 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
|
|
2284 return;
|
|
2285 }
|
|
2286
|
|
2287 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
|
|
2288 int type;
|
|
2289 int l = left->color[0];
|
|
2290 int cb= left->color[1];
|
|
2291 int cr= left->color[2];
|
|
2292 int mx= mid_pred(left->mx, top->mx, tr->mx);
|
|
2293 int my= mid_pred(left->my, top->my, tr->my);
|
|
2294 int ref = 0;
|
|
2295 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
|
|
2296 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
|
|
2297 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
|
|
2298
|
|
2299 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
|
|
2300
|
|
2301 if(type){
|
|
2302 pred_mv(s, &mx, &my, 0, left, top, tr);
|
|
2303 l += get_symbol(&s->c, &s->block_state[32], 1);
|
|
2304 cb+= get_symbol(&s->c, &s->block_state[64], 1);
|
|
2305 cr+= get_symbol(&s->c, &s->block_state[96], 1);
|
|
2306 }else{
|
|
2307 if(s->ref_frames > 1)
|
|
2308 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
|
|
2309 pred_mv(s, &mx, &my, ref, left, top, tr);
|
|
2310 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
|
|
2311 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
|
|
2312 }
|
|
2313 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
|
|
2314 }else{
|
|
2315 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
|
|
2316 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
|
|
2317 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
|
|
2318 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
|
|
2319 }
|
|
2320 }
|
|
2321
|
|
2322 static void encode_blocks(SnowContext *s, int search){
|
|
2323 int x, y;
|
|
2324 int w= s->b_width;
|
|
2325 int h= s->b_height;
|
|
2326
|
|
2327 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
|
|
2328 iterative_me(s);
|
|
2329
|
|
2330 for(y=0; y<h; y++){
|
|
2331 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
|
|
2332 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
|
|
2333 return;
|
|
2334 }
|
|
2335 for(x=0; x<w; x++){
|
|
2336 if(s->avctx->me_method == ME_ITER || !search)
|
|
2337 encode_q_branch2(s, 0, x, y);
|
|
2338 else
|
|
2339 encode_q_branch (s, 0, x, y);
|
|
2340 }
|
|
2341 }
|
|
2342 }
|
|
2343
|
|
2344 static void decode_blocks(SnowContext *s){
|
|
2345 int x, y;
|
|
2346 int w= s->b_width;
|
|
2347 int h= s->b_height;
|
|
2348
|
|
2349 for(y=0; y<h; y++){
|
|
2350 for(x=0; x<w; x++){
|
|
2351 decode_q_branch(s, 0, x, y);
|
|
2352 }
|
|
2353 }
|
|
2354 }
|
|
2355
|
|
2356 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
|
|
2357 int x, y;
|
|
2358 START_TIMER
|
|
2359 for(y=0; y < b_h+5; y++){
|
|
2360 for(x=0; x < b_w; x++){
|
|
2361 int a0= src[x ];
|
|
2362 int a1= src[x + 1];
|
|
2363 int a2= src[x + 2];
|
|
2364 int a3= src[x + 3];
|
|
2365 int a4= src[x + 4];
|
|
2366 int a5= src[x + 5];
|
|
2367 // int am= 9*(a1+a2) - (a0+a3);
|
|
2368 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
|
|
2369 // int am= 18*(a2+a3) - 2*(a1+a4);
|
|
2370 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
|
|
2371 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
|
|
2372
|
|
2373 // if(b_w==16) am= 8*(a1+a2);
|
|
2374
|
|
2375 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
|
|
2376 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
|
|
2377
|
|
2378 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
|
|
2379 if(am&(~255)) am= ~(am>>31);
|
|
2380
|
|
2381 tmp[x] = am;
|
|
2382
|
|
2383 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
|
|
2384 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
|
|
2385 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
|
|
2386 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
|
|
2387 }
|
|
2388 tmp += stride;
|
|
2389 src += stride;
|
|
2390 }
|
|
2391 tmp -= (b_h+5)*stride;
|
|
2392
|
|
2393 for(y=0; y < b_h; y++){
|
|
2394 for(x=0; x < b_w; x++){
|
|
2395 int a0= tmp[x + 0*stride];
|
|
2396 int a1= tmp[x + 1*stride];
|
|
2397 int a2= tmp[x + 2*stride];
|
|
2398 int a3= tmp[x + 3*stride];
|
|
2399 int a4= tmp[x + 4*stride];
|
|
2400 int a5= tmp[x + 5*stride];
|
|
2401 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
|
|
2402 // int am= 18*(a2+a3) - 2*(a1+a4);
|
|
2403 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
|
|
2404 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
|
|
2405
|
|
2406 // if(b_w==16) am= 8*(a1+a2);
|
|
2407
|
|
2408 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
|
|
2409 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
|
|
2410
|
|
2411 if(am&(~255)) am= ~(am>>31);
|
|
2412
|
|
2413 dst[x] = am;
|
|
2414 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
|
|
2415 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
|
|
2416 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
|
|
2417 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
|
|
2418 }
|
|
2419 dst += stride;
|
|
2420 tmp += stride;
|
|
2421 }
|
|
2422 STOP_TIMER("mc_block")
|
|
2423 }
|
|
2424
|
|
2425 #define mca(dx,dy,b_w)\
|
|
2426 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
|
|
2427 uint8_t tmp[stride*(b_w+5)];\
|
|
2428 assert(h==b_w);\
|
|
2429 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
|
|
2430 }
|
|
2431
|
|
2432 mca( 0, 0,16)
|
|
2433 mca( 8, 0,16)
|
|
2434 mca( 0, 8,16)
|
|
2435 mca( 8, 8,16)
|
|
2436 mca( 0, 0,8)
|
|
2437 mca( 8, 0,8)
|
|
2438 mca( 0, 8,8)
|
|
2439 mca( 8, 8,8)
|
|
2440
|
|
2441 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
|
|
2442 if(block->type & BLOCK_INTRA){
|
|
2443 int x, y;
|
|
2444 const int color = block->color[plane_index];
|
|
2445 const int color4= color*0x01010101;
|
|
2446 if(b_w==32){
|
|
2447 for(y=0; y < b_h; y++){
|
|
2448 *(uint32_t*)&dst[0 + y*stride]= color4;
|
|
2449 *(uint32_t*)&dst[4 + y*stride]= color4;
|
|
2450 *(uint32_t*)&dst[8 + y*stride]= color4;
|
|
2451 *(uint32_t*)&dst[12+ y*stride]= color4;
|
|
2452 *(uint32_t*)&dst[16+ y*stride]= color4;
|
|
2453 *(uint32_t*)&dst[20+ y*stride]= color4;
|
|
2454 *(uint32_t*)&dst[24+ y*stride]= color4;
|
|
2455 *(uint32_t*)&dst[28+ y*stride]= color4;
|
|
2456 }
|
|
2457 }else if(b_w==16){
|
|
2458 for(y=0; y < b_h; y++){
|
|
2459 *(uint32_t*)&dst[0 + y*stride]= color4;
|
|
2460 *(uint32_t*)&dst[4 + y*stride]= color4;
|
|
2461 *(uint32_t*)&dst[8 + y*stride]= color4;
|
|
2462 *(uint32_t*)&dst[12+ y*stride]= color4;
|
|
2463 }
|
|
2464 }else if(b_w==8){
|
|
2465 for(y=0; y < b_h; y++){
|
|
2466 *(uint32_t*)&dst[0 + y*stride]= color4;
|
|
2467 *(uint32_t*)&dst[4 + y*stride]= color4;
|
|
2468 }
|
|
2469 }else if(b_w==4){
|
|
2470 for(y=0; y < b_h; y++){
|
|
2471 *(uint32_t*)&dst[0 + y*stride]= color4;
|
|
2472 }
|
|
2473 }else{
|
|
2474 for(y=0; y < b_h; y++){
|
|
2475 for(x=0; x < b_w; x++){
|
|
2476 dst[x + y*stride]= color;
|
|
2477 }
|
|
2478 }
|
|
2479 }
|
|
2480 }else{
|
|
2481 uint8_t *src= s->last_picture[block->ref].data[plane_index];
|
|
2482 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
|
|
2483 int mx= block->mx*scale;
|
|
2484 int my= block->my*scale;
|
|
2485 const int dx= mx&15;
|
|
2486 const int dy= my&15;
|
|
2487 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
|
|
2488 sx += (mx>>4) - 2;
|
|
2489 sy += (my>>4) - 2;
|
|
2490 src += sx + sy*stride;
|
|
2491 if( (unsigned)sx >= w - b_w - 4
|
|
2492 || (unsigned)sy >= h - b_h - 4){
|
|
2493 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
|
|
2494 src= tmp + MB_SIZE;
|
|
2495 }
|
|
2496 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
|
|
2497 // assert(!(b_w&(b_w-1)));
|
|
2498 assert(b_w>1 && b_h>1);
|
|
2499 assert(tab_index>=0 && tab_index<4 || b_w==32);
|
|
2500 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
|
|
2501 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
|
|
2502 else if(b_w==32){
|
|
2503 int y;
|
|
2504 for(y=0; y<b_h; y+=16){
|
|
2505 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
|
|
2506 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
|
|
2507 }
|
|
2508 }else if(b_w==b_h)
|
|
2509 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
|
|
2510 else if(b_w==2*b_h){
|
|
2511 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
|
|
2512 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
|
|
2513 }else{
|
|
2514 assert(2*b_w==b_h);
|
|
2515 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
|
|
2516 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
|
|
2517 }
|
|
2518 }
|
|
2519 }
|
|
2520
|
|
2521 void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
|
|
2522 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
|
|
2523 int y, x;
|
|
2524 DWTELEM * dst;
|
|
2525 for(y=0; y<b_h; y++){
|
|
2526 //FIXME ugly missue of obmc_stride
|
|
2527 uint8_t *obmc1= obmc + y*obmc_stride;
|
|
2528 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
|
|
2529 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
|
|
2530 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
|
|
2531 dst = slice_buffer_get_line(sb, src_y + y);
|
|
2532 for(x=0; x<b_w; x++){
|
|
2533 int v= obmc1[x] * block[3][x + y*src_stride]
|
|
2534 +obmc2[x] * block[2][x + y*src_stride]
|
|
2535 +obmc3[x] * block[1][x + y*src_stride]
|
|
2536 +obmc4[x] * block[0][x + y*src_stride];
|
|
2537
|
|
2538 v <<= 8 - LOG2_OBMC_MAX;
|
|
2539 if(FRAC_BITS != 8){
|
|
2540 v += 1<<(7 - FRAC_BITS);
|
|
2541 v >>= 8 - FRAC_BITS;
|
|
2542 }
|
|
2543 if(add){
|
|
2544 v += dst[x + src_x];
|
|
2545 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
|
|
2546 if(v&(~255)) v= ~(v>>31);
|
|
2547 dst8[x + y*src_stride] = v;
|
|
2548 }else{
|
|
2549 dst[x + src_x] -= v;
|
|
2550 }
|
|
2551 }
|
|
2552 }
|
|
2553 }
|
|
2554
|
|
2555 //FIXME name clenup (b_w, block_w, b_width stuff)
|
|
2556 static always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
|
|
2557 const int b_width = s->b_width << s->block_max_depth;
|
|
2558 const int b_height= s->b_height << s->block_max_depth;
|
|
2559 const int b_stride= b_width;
|
|
2560 BlockNode *lt= &s->block[b_x + b_y*b_stride];
|
|
2561 BlockNode *rt= lt+1;
|
|
2562 BlockNode *lb= lt+b_stride;
|
|
2563 BlockNode *rb= lb+1;
|
|
2564 uint8_t *block[4];
|
|
2565 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
|
|
2566 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
|
|
2567 uint8_t *ptmp;
|
|
2568 int x,y;
|
|
2569
|
|
2570 if(b_x<0){
|
|
2571 lt= rt;
|
|
2572 lb= rb;
|
|
2573 }else if(b_x + 1 >= b_width){
|
|
2574 rt= lt;
|
|
2575 rb= lb;
|
|
2576 }
|
|
2577 if(b_y<0){
|
|
2578 lt= lb;
|
|
2579 rt= rb;
|
|
2580 }else if(b_y + 1 >= b_height){
|
|
2581 lb= lt;
|
|
2582 rb= rt;
|
|
2583 }
|
|
2584
|
|
2585 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
|
|
2586 obmc -= src_x;
|
|
2587 b_w += src_x;
|
|
2588 if(!sliced && !offset_dst)
|
|
2589 dst -= src_x;
|
|
2590 src_x=0;
|
|
2591 }else if(src_x + b_w > w){
|
|
2592 b_w = w - src_x;
|
|
2593 }
|
|
2594 if(src_y<0){
|
|
2595 obmc -= src_y*obmc_stride;
|
|
2596 b_h += src_y;
|
|
2597 if(!sliced && !offset_dst)
|
|
2598 dst -= src_y*dst_stride;
|
|
2599 src_y=0;
|
|
2600 }else if(src_y + b_h> h){
|
|
2601 b_h = h - src_y;
|
|
2602 }
|
|
2603
|
|
2604 if(b_w<=0 || b_h<=0) return;
|
|
2605
|
|
2606 assert(src_stride > 2*MB_SIZE + 5);
|
|
2607 if(!sliced && offset_dst)
|
|
2608 dst += src_x + src_y*dst_stride;
|
|
2609 dst8+= src_x + src_y*src_stride;
|
|
2610 // src += src_x + src_y*src_stride;
|
|
2611
|
|
2612 ptmp= tmp + 3*tmp_step;
|
|
2613 block[0]= ptmp;
|
|
2614 ptmp+=tmp_step;
|
|
2615 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
|
|
2616
|
|
2617 if(same_block(lt, rt)){
|
|
2618 block[1]= block[0];
|
|
2619 }else{
|
|
2620 block[1]= ptmp;
|
|
2621 ptmp+=tmp_step;
|
|
2622 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
|
|
2623 }
|
|
2624
|
|
2625 if(same_block(lt, lb)){
|
|
2626 block[2]= block[0];
|
|
2627 }else if(same_block(rt, lb)){
|
|
2628 block[2]= block[1];
|
|
2629 }else{
|
|
2630 block[2]= ptmp;
|
|
2631 ptmp+=tmp_step;
|
|
2632 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
|
|
2633 }
|
|
2634
|
|
2635 if(same_block(lt, rb) ){
|
|
2636 block[3]= block[0];
|
|
2637 }else if(same_block(rt, rb)){
|
|
2638 block[3]= block[1];
|
|
2639 }else if(same_block(lb, rb)){
|
|
2640 block[3]= block[2];
|
|
2641 }else{
|
|
2642 block[3]= ptmp;
|
|
2643 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
|
|
2644 }
|
|
2645 #if 0
|
|
2646 for(y=0; y<b_h; y++){
|
|
2647 for(x=0; x<b_w; x++){
|
|
2648 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
|
|
2649 if(add) dst[x + y*dst_stride] += v;
|
|
2650 else dst[x + y*dst_stride] -= v;
|
|
2651 }
|
|
2652 }
|
|
2653 for(y=0; y<b_h; y++){
|
|
2654 uint8_t *obmc2= obmc + (obmc_stride>>1);
|
|
2655 for(x=0; x<b_w; x++){
|
|
2656 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
|
|
2657 if(add) dst[x + y*dst_stride] += v;
|
|
2658 else dst[x + y*dst_stride] -= v;
|
|
2659 }
|
|
2660 }
|
|
2661 for(y=0; y<b_h; y++){
|
|
2662 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
|
|
2663 for(x=0; x<b_w; x++){
|
|
2664 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
|
|
2665 if(add) dst[x + y*dst_stride] += v;
|
|
2666 else dst[x + y*dst_stride] -= v;
|
|
2667 }
|
|
2668 }
|
|
2669 for(y=0; y<b_h; y++){
|
|
2670 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
|
|
2671 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
|
|
2672 for(x=0; x<b_w; x++){
|
|
2673 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
|
|
2674 if(add) dst[x + y*dst_stride] += v;
|
|
2675 else dst[x + y*dst_stride] -= v;
|
|
2676 }
|
|
2677 }
|
|
2678 #else
|
|
2679 if(sliced){
|
|
2680 START_TIMER
|
|
2681
|
|
2682 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
|
|
2683 STOP_TIMER("inner_add_yblock")
|
|
2684 }else
|
|
2685 for(y=0; y<b_h; y++){
|
|
2686 //FIXME ugly missue of obmc_stride
|
|
2687 uint8_t *obmc1= obmc + y*obmc_stride;
|
|
2688 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
|
|
2689 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
|
|
2690 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
|
|
2691 for(x=0; x<b_w; x++){
|
|
2692 int v= obmc1[x] * block[3][x + y*src_stride]
|
|
2693 +obmc2[x] * block[2][x + y*src_stride]
|
|
2694 +obmc3[x] * block[1][x + y*src_stride]
|
|
2695 +obmc4[x] * block[0][x + y*src_stride];
|
|
2696
|
|
2697 v <<= 8 - LOG2_OBMC_MAX;
|
|
2698 if(FRAC_BITS != 8){
|
|
2699 v += 1<<(7 - FRAC_BITS);
|
|
2700 v >>= 8 - FRAC_BITS;
|
|
2701 }
|
|
2702 if(add){
|
|
2703 v += dst[x + y*dst_stride];
|
|
2704 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
|
|
2705 if(v&(~255)) v= ~(v>>31);
|
|
2706 dst8[x + y*src_stride] = v;
|
|
2707 }else{
|
|
2708 dst[x + y*dst_stride] -= v;
|
|
2709 }
|
|
2710 }
|
|
2711 }
|
|
2712 #endif
|
|
2713 }
|
|
2714
|
|
2715 static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
|
|
2716 Plane *p= &s->plane[plane_index];
|
|
2717 const int mb_w= s->b_width << s->block_max_depth;
|
|
2718 const int mb_h= s->b_height << s->block_max_depth;
|
|
2719 int x, y, mb_x;
|
|
2720 int block_size = MB_SIZE >> s->block_max_depth;
|
|
2721 int block_w = plane_index ? block_size/2 : block_size;
|
|
2722 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
|
|
2723 int obmc_stride= plane_index ? block_size : 2*block_size;
|
|
2724 int ref_stride= s->current_picture.linesize[plane_index];
|
|
2725 uint8_t *dst8= s->current_picture.data[plane_index];
|
|
2726 int w= p->width;
|
|
2727 int h= p->height;
|
|
2728 START_TIMER
|
|
2729
|
|
2730 if(s->keyframe || (s->avctx->debug&512)){
|
|
2731 if(mb_y==mb_h)
|
|
2732 return;
|
|
2733
|
|
2734 if(add){
|
|
2735 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
|
|
2736 {
|
|
2737 // DWTELEM * line = slice_buffer_get_line(sb, y);
|
|
2738 DWTELEM * line = sb->line[y];
|
|
2739 for(x=0; x<w; x++)
|
|
2740 {
|
|
2741 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
|
|
2742 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
|
|
2743 v >>= FRAC_BITS;
|
|
2744 if(v&(~255)) v= ~(v>>31);
|
|
2745 dst8[x + y*ref_stride]= v;
|
|
2746 }
|
|
2747 }
|
|
2748 }else{
|
|
2749 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
|
|
2750 {
|
|
2751 // DWTELEM * line = slice_buffer_get_line(sb, y);
|
|
2752 DWTELEM * line = sb->line[y];
|
|
2753 for(x=0; x<w; x++)
|
|
2754 {
|
|
2755 line[x] -= 128 << FRAC_BITS;
|
|
2756 // buf[x + y*w]-= 128<<FRAC_BITS;
|
|
2757 }
|
|
2758 }
|
|
2759 }
|
|
2760
|
|
2761 return;
|
|
2762 }
|
|
2763
|
|
2764 for(mb_x=0; mb_x<=mb_w; mb_x++){
|
|
2765 START_TIMER
|
|
2766
|
|
2767 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
|
|
2768 block_w*mb_x - block_w/2,
|
|
2769 block_w*mb_y - block_w/2,
|
|
2770 block_w, block_w,
|
|
2771 w, h,
|
|
2772 w, ref_stride, obmc_stride,
|
|
2773 mb_x - 1, mb_y - 1,
|
|
2774 add, 0, plane_index);
|
|
2775
|
|
2776 STOP_TIMER("add_yblock")
|
|
2777 }
|
|
2778
|
|
2779 STOP_TIMER("predict_slice")
|
|
2780 }
|
|
2781
|
|
2782 static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
|
|
2783 Plane *p= &s->plane[plane_index];
|
|
2784 const int mb_w= s->b_width << s->block_max_depth;
|
|
2785 const int mb_h= s->b_height << s->block_max_depth;
|
|
2786 int x, y, mb_x;
|
|
2787 int block_size = MB_SIZE >> s->block_max_depth;
|
|
2788 int block_w = plane_index ? block_size/2 : block_size;
|
|
2789 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
|
|
2790 const int obmc_stride= plane_index ? block_size : 2*block_size;
|
|
2791 int ref_stride= s->current_picture.linesize[plane_index];
|
|
2792 uint8_t *dst8= s->current_picture.data[plane_index];
|
|
2793 int w= p->width;
|
|
2794 int h= p->height;
|
|
2795 START_TIMER
|
|
2796
|
|
2797 if(s->keyframe || (s->avctx->debug&512)){
|
|
2798 if(mb_y==mb_h)
|
|
2799 return;
|
|
2800
|
|
2801 if(add){
|
|
2802 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
|
|
2803 for(x=0; x<w; x++){
|
|
2804 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
|
|
2805 v >>= FRAC_BITS;
|
|
2806 if(v&(~255)) v= ~(v>>31);
|
|
2807 dst8[x + y*ref_stride]= v;
|
|
2808 }
|
|
2809 }
|
|
2810 }else{
|
|
2811 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
|
|
2812 for(x=0; x<w; x++){
|
|
2813 buf[x + y*w]-= 128<<FRAC_BITS;
|
|
2814 }
|
|
2815 }
|
|
2816 }
|
|
2817
|
|
2818 return;
|
|
2819 }
|
|
2820
|
|
2821 for(mb_x=0; mb_x<=mb_w; mb_x++){
|
|
2822 START_TIMER
|
|
2823
|
|
2824 add_yblock(s, 0, NULL, buf, dst8, obmc,
|
|
2825 block_w*mb_x - block_w/2,
|
|
2826 block_w*mb_y - block_w/2,
|
|
2827 block_w, block_w,
|
|
2828 w, h,
|
|
2829 w, ref_stride, obmc_stride,
|
|
2830 mb_x - 1, mb_y - 1,
|
|
2831 add, 1, plane_index);
|
|
2832
|
|
2833 STOP_TIMER("add_yblock")
|
|
2834 }
|
|
2835
|
|
2836 STOP_TIMER("predict_slice")
|
|
2837 }
|
|
2838
|
|
2839 static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
|
|
2840 const int mb_h= s->b_height << s->block_max_depth;
|
|
2841 int mb_y;
|
|
2842 for(mb_y=0; mb_y<=mb_h; mb_y++)
|
|
2843 predict_slice(s, buf, plane_index, add, mb_y);
|
|
2844 }
|
|
2845
|
|
2846 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
|
|
2847 int i, x2, y2;
|
|
2848 Plane *p= &s->plane[plane_index];
|
|
2849 const int block_size = MB_SIZE >> s->block_max_depth;
|
|
2850 const int block_w = plane_index ? block_size/2 : block_size;
|
|
2851 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
|
|
2852 const int obmc_stride= plane_index ? block_size : 2*block_size;
|
|
2853 const int ref_stride= s->current_picture.linesize[plane_index];
|
|
2854 uint8_t *src= s-> input_picture.data[plane_index];
|
|
2855 DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
|
|
2856 const int b_stride = s->b_width << s->block_max_depth;
|
|
2857 const int w= p->width;
|
|
2858 const int h= p->height;
|
|
2859 int index= mb_x + mb_y*b_stride;
|
|
2860 BlockNode *b= &s->block[index];
|
|
2861 BlockNode backup= *b;
|
|
2862 int ab=0;
|
|
2863 int aa=0;
|
|
2864
|
|
2865 b->type|= BLOCK_INTRA;
|
|
2866 b->color[plane_index]= 0;
|
|
2867 memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
|
|
2868
|
|
2869 for(i=0; i<4; i++){
|
|
2870 int mb_x2= mb_x + (i &1) - 1;
|
|
2871 int mb_y2= mb_y + (i>>1) - 1;
|
|
2872 int x= block_w*mb_x2 + block_w/2;
|
|
2873 int y= block_w*mb_y2 + block_w/2;
|
|
2874
|
|
2875 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
|
|
2876 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
|
|
2877
|
|
2878 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
|
|
2879 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
|
|
2880 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
|
|
2881 int obmc_v= obmc[index];
|
|
2882 int d;
|
|
2883 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
|
|
2884 if(x<0) obmc_v += obmc[index + block_w];
|
|
2885 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
|
|
2886 if(x+block_w>w) obmc_v += obmc[index - block_w];
|
|
2887 //FIXME precalc this or simplify it somehow else
|
|
2888
|
|
2889 d = -dst[index] + (1<<(FRAC_BITS-1));
|
|
2890 dst[index] = d;
|
|
2891 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
|
|
2892 aa += obmc_v * obmc_v; //FIXME precalclate this
|
|
2893 }
|
|
2894 }
|
|
2895 }
|
|
2896 *b= backup;
|
|
2897
|
|
2898 return clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
|
|
2899 }
|
|
2900
|
|
2901 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
|
|
2902 const int b_stride = s->b_width << s->block_max_depth;
|
|
2903 const int b_height = s->b_height<< s->block_max_depth;
|
|
2904 int index= x + y*b_stride;
|
|
2905 BlockNode *b = &s->block[index];
|
|
2906 BlockNode *left = x ? &s->block[index-1] : &null_block;
|
|
2907 BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
|
|
2908 BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
|
|
2909 BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
|
|
2910 int dmx, dmy;
|
|
2911 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
|
|
2912 // int my_context= av_log2(2*FFABS(left->my - top->my));
|
|
2913
|
|
2914 if(x<0 || x>=b_stride || y>=b_height)
|
|
2915 return 0;
|
|
2916 /*
|
|
2917 1 0 0
|
|
2918 01X 1-2 1
|
|
2919 001XX 3-6 2-3
|
|
2920 0001XXX 7-14 4-7
|
|
2921 00001XXXX 15-30 8-15
|
|
2922 */
|
|
2923 //FIXME try accurate rate
|
|
2924 //FIXME intra and inter predictors if surrounding blocks arent the same type
|
|
2925 if(b->type & BLOCK_INTRA){
|
|
2926 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
|
|
2927 + av_log2(2*FFABS(left->color[1] - b->color[1]))
|
|
2928 + av_log2(2*FFABS(left->color[2] - b->color[2])));
|
|
2929 }else{
|
|
2930 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
|
|
2931 dmx-= b->mx;
|
|
2932 dmy-= b->my;
|
|
2933 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
|
|
2934 + av_log2(2*FFABS(dmy))
|
|
2935 + av_log2(2*b->ref));
|
|
2936 }
|
|
2937 }
|
|
2938
|
|
2939 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
|
|
2940 Plane *p= &s->plane[plane_index];
|
|
2941 const int block_size = MB_SIZE >> s->block_max_depth;
|
|
2942 const int block_w = plane_index ? block_size/2 : block_size;
|
|
2943 const int obmc_stride= plane_index ? block_size : 2*block_size;
|
|
2944 const int ref_stride= s->current_picture.linesize[plane_index];
|
|
2945 uint8_t *dst= s->current_picture.data[plane_index];
|
|
2946 uint8_t *src= s-> input_picture.data[plane_index];
|
|
2947 DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
|
|
2948 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
|
|
2949 uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
|
|
2950 const int b_stride = s->b_width << s->block_max_depth;
|
|
2951 const int b_height = s->b_height<< s->block_max_depth;
|
|
2952 const int w= p->width;
|
|
2953 const int h= p->height;
|
|
2954 int distortion;
|
|
2955 int rate= 0;
|
|
2956 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
|
|
2957 int sx= block_w*mb_x - block_w/2;
|
|
2958 int sy= block_w*mb_y - block_w/2;
|
|
2959 int x0= FFMAX(0,-sx);
|
|
2960 int y0= FFMAX(0,-sy);
|
|
2961 int x1= FFMIN(block_w*2, w-sx);
|
|
2962 int y1= FFMIN(block_w*2, h-sy);
|
|
2963 int i,x,y;
|
|
2964
|
|
2965 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
|
|
2966
|
|
2967 for(y=y0; y<y1; y++){
|
|
2968 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
|
|
2969 const DWTELEM *pred1 = pred + y*obmc_stride;
|
|
2970 uint8_t *cur1 = cur + y*ref_stride;
|
|
2971 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
|
|
2972 for(x=x0; x<x1; x++){
|
|
2973 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
|
|
2974 v = (v + pred1[x]) >> FRAC_BITS;
|
|
2975 if(v&(~255)) v= ~(v>>31);
|
|
2976 dst1[x] = v;
|
|
2977 }
|
|
2978 }
|
|
2979
|
|
2980 /* copy the regions where obmc[] = (uint8_t)256 */
|
|
2981 if(LOG2_OBMC_MAX == 8
|
|
2982 && (mb_x == 0 || mb_x == b_stride-1)
|
|
2983 && (mb_y == 0 || mb_y == b_height-1)){
|
|
2984 if(mb_x == 0)
|
|
2985 x1 = block_w;
|
|
2986 else
|
|
2987 x0 = block_w;
|
|
2988 if(mb_y == 0)
|
|
2989 y1 = block_w;
|
|
2990 else
|
|
2991 y0 = block_w;
|
|
2992 for(y=y0; y<y1; y++)
|
|
2993 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
|
|
2994 }
|
|
2995
|
|
2996 if(block_w==16){
|
|
2997 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
|
|
2998 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
|
|
2999 /* FIXME cmps overlap but don't cover the wavelet's whole support,
|
|
3000 * so improving the score of one block is not strictly guaranteed to
|
|
3001 * improve the score of the whole frame, so iterative motion est
|
|
3002 * doesn't always converge. */
|
|
3003 if(s->avctx->me_cmp == FF_CMP_W97)
|
|
3004 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
|
|
3005 else if(s->avctx->me_cmp == FF_CMP_W53)
|
|
3006 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
|
|
3007 else{
|
|
3008 distortion = 0;
|
|
3009 for(i=0; i<4; i++){
|
|
3010 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
|
|
3011 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
|
|
3012 }
|
|
3013 }
|
|
3014 }else{
|
|
3015 assert(block_w==8);
|
|
3016 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
|
|
3017 }
|
|
3018
|
|
3019 if(plane_index==0){
|
|
3020 for(i=0; i<4; i++){
|
|
3021 /* ..RRr
|
|
3022 * .RXx.
|
|
3023 * rxx..
|
|
3024 */
|
|
3025 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
|
|
3026 }
|
|
3027 if(mb_x == b_stride-2)
|
|
3028 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
|
|
3029 }
|
|
3030 return distortion + rate*penalty_factor;
|
|
3031 }
|
|
3032
|
|
3033 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
|
|
3034 int i, y2;
|
|
3035 Plane *p= &s->plane[plane_index];
|
|
3036 const int block_size = MB_SIZE >> s->block_max_depth;
|
|
3037 const int block_w = plane_index ? block_size/2 : block_size;
|
|
3038 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
|
|
3039 const int obmc_stride= plane_index ? block_size : 2*block_size;
|
|
3040 const int ref_stride= s->current_picture.linesize[plane_index];
|
|
3041 uint8_t *dst= s->current_picture.data[plane_index];
|
|
3042 uint8_t *src= s-> input_picture.data[plane_index];
|
|
3043 static const DWTELEM zero_dst[4096]; //FIXME
|
|
3044 const int b_stride = s->b_width << s->block_max_depth;
|
|
3045 const int w= p->width;
|
|
3046 const int h= p->height;
|
|
3047 int distortion= 0;
|
|
3048 int rate= 0;
|
|
3049 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
|
|
3050
|
|
3051 for(i=0; i<9; i++){
|
|
3052 int mb_x2= mb_x + (i%3) - 1;
|
|
3053 int mb_y2= mb_y + (i/3) - 1;
|
|
3054 int x= block_w*mb_x2 + block_w/2;
|
|
3055 int y= block_w*mb_y2 + block_w/2;
|
|
3056
|
|
3057 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
|
|
3058 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
|
|
3059
|
|
3060 //FIXME find a cleaner/simpler way to skip the outside stuff
|
|
3061 for(y2= y; y2<0; y2++)
|
|
3062 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
|
|
3063 for(y2= h; y2<y+block_w; y2++)
|
|
3064 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
|
|
3065 if(x<0){
|
|
3066 for(y2= y; y2<y+block_w; y2++)
|
|
3067 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
|
|
3068 }
|
|
3069 if(x+block_w > w){
|
|
3070 for(y2= y; y2<y+block_w; y2++)
|
|
3071 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
|
|
3072 }
|
|
3073
|
|
3074 assert(block_w== 8 || block_w==16);
|
|
3075 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
|
|
3076 }
|
|
3077
|
|
3078 if(plane_index==0){
|
|
3079 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
|
|
3080 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
|
|
3081
|
|
3082 /* ..RRRr
|
|
3083 * .RXXx.
|
|
3084 * .RXXx.
|
|
3085 * rxxx.
|
|
3086 */
|
|
3087 if(merged)
|
|
3088 rate = get_block_bits(s, mb_x, mb_y, 2);
|
|
3089 for(i=merged?4:0; i<9; i++){
|
|
3090 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
|
|
3091 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
|
|
3092 }
|
|
3093 }
|
|
3094 return distortion + rate*penalty_factor;
|
|
3095 }
|
|
3096
|
|
3097 static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
|
|
3098 const int b_stride= s->b_width << s->block_max_depth;
|
|
3099 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
|
|
3100 BlockNode backup= *block;
|
|
3101 int rd, index, value;
|
|
3102
|
|
3103 assert(mb_x>=0 && mb_y>=0);
|
|
3104 assert(mb_x<b_stride);
|
|
3105
|
|
3106 if(intra){
|
|
3107 block->color[0] = p[0];
|
|
3108 block->color[1] = p[1];
|
|
3109 block->color[2] = p[2];
|
|
3110 block->type |= BLOCK_INTRA;
|
|
3111 }else{
|
|
3112 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
|
|
3113 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
|
|
3114 if(s->me_cache[index] == value)
|
|
3115 return 0;
|
|
3116 s->me_cache[index]= value;
|
|
3117
|
|
3118 block->mx= p[0];
|
|
3119 block->my= p[1];
|
|
3120 block->type &= ~BLOCK_INTRA;
|
|
3121 }
|
|
3122
|
|
3123 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
|
|
3124
|
|
3125 //FIXME chroma
|
|
3126 if(rd < *best_rd){
|
|
3127 *best_rd= rd;
|
|
3128 return 1;
|
|
3129 }else{
|
|
3130 *block= backup;
|
|
3131 return 0;
|
|
3132 }
|
|
3133 }
|
|
3134
|
|
3135 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
|
|
3136 static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
|
|
3137 int p[2] = {p0, p1};
|
|
3138 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
|
|
3139 }
|
|
3140
|
|
3141 static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
|
|
3142 const int b_stride= s->b_width << s->block_max_depth;
|
|
3143 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
|
|
3144 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
|
|
3145 int rd, index, value;
|
|
3146
|
|
3147 assert(mb_x>=0 && mb_y>=0);
|
|
3148 assert(mb_x<b_stride);
|
|
3149 assert(((mb_x|mb_y)&1) == 0);
|
|
3150
|
|
3151 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
|
|
3152 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
|
|
3153 if(s->me_cache[index] == value)
|
|
3154 return 0;
|
|
3155 s->me_cache[index]= value;
|
|
3156
|
|
3157 block->mx= p0;
|
|
3158 block->my= p1;
|
|
3159 block->ref= ref;
|
|
3160 block->type &= ~BLOCK_INTRA;
|
|
3161 block[1]= block[b_stride]= block[b_stride+1]= *block;
|
|
3162
|
|
3163 rd= get_4block_rd(s, mb_x, mb_y, 0);
|
|
3164
|
|
3165 //FIXME chroma
|
|
3166 if(rd < *best_rd){
|
|
3167 *best_rd= rd;
|
|
3168 return 1;
|
|
3169 }else{
|
|
3170 block[0]= backup[0];
|
|
3171 block[1]= backup[1];
|
|
3172 block[b_stride]= backup[2];
|
|
3173 block[b_stride+1]= backup[3];
|
|
3174 return 0;
|
|
3175 }
|
|
3176 }
|
|
3177
|
|
3178 static void iterative_me(SnowContext *s){
|
|
3179 int pass, mb_x, mb_y;
|
|
3180 const int b_width = s->b_width << s->block_max_depth;
|
|
3181 const int b_height= s->b_height << s->block_max_depth;
|
|
3182 const int b_stride= b_width;
|
|
3183 int color[3];
|
|
3184
|
|
3185 {
|
|
3186 RangeCoder r = s->c;
|
|
3187 uint8_t state[sizeof(s->block_state)];
|
|
3188 memcpy(state, s->block_state, sizeof(s->block_state));
|
|
3189 for(mb_y= 0; mb_y<s->b_height; mb_y++)
|
|
3190 for(mb_x= 0; mb_x<s->b_width; mb_x++)
|
|
3191 encode_q_branch(s, 0, mb_x, mb_y);
|
|
3192 s->c = r;
|
|
3193 memcpy(s->block_state, state, sizeof(s->block_state));
|
|
3194 }
|
|
3195
|
|
3196 for(pass=0; pass<25; pass++){
|
|
3197 int change= 0;
|
|
3198
|
|
3199 for(mb_y= 0; mb_y<b_height; mb_y++){
|
|
3200 for(mb_x= 0; mb_x<b_width; mb_x++){
|
|
3201 int dia_change, i, j, ref;
|
|
3202 int best_rd= INT_MAX, ref_rd;
|
|
3203 BlockNode backup, ref_b;
|
|
3204 const int index= mb_x + mb_y * b_stride;
|
|
3205 BlockNode *block= &s->block[index];
|
|
3206 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
|
|
3207 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
|
|
3208 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
|
|
3209 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
|
|
3210 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
|
|
3211 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
|
|
3212 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
|
|
3213 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
|
|
3214 const int b_w= (MB_SIZE >> s->block_max_depth);
|
|
3215 uint8_t obmc_edged[b_w*2][b_w*2];
|
|
3216
|
|
3217 if(pass && (block->type & BLOCK_OPT))
|
|
3218 continue;
|
|
3219 block->type |= BLOCK_OPT;
|
|
3220
|
|
3221 backup= *block;
|
|
3222
|
|
3223 if(!s->me_cache_generation)
|
|
3224 memset(s->me_cache, 0, sizeof(s->me_cache));
|
|
3225 s->me_cache_generation += 1<<22;
|
|
3226
|
|
3227 //FIXME precalc
|
|
3228 {
|
|
3229 int x, y;
|
|
3230 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
|
|
3231 if(mb_x==0)
|
|
3232 for(y=0; y<b_w*2; y++)
|
|
3233 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
|
|
3234 if(mb_x==b_stride-1)
|
|
3235 for(y=0; y<b_w*2; y++)
|
|
3236 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
|
|
3237 if(mb_y==0){
|
|
3238 for(x=0; x<b_w*2; x++)
|
|
3239 obmc_edged[0][x] += obmc_edged[b_w-1][x];
|
|
3240 for(y=1; y<b_w; y++)
|
|
3241 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
|
|
3242 }
|
|
3243 if(mb_y==b_height-1){
|
|
3244 for(x=0; x<b_w*2; x++)
|
|
3245 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
|
|
3246 for(y=b_w; y<b_w*2-1; y++)
|
|
3247 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
|
|
3248 }
|
|
3249 }
|
|
3250
|
|
3251 //skip stuff outside the picture
|
|
3252 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
|
|
3253 {
|
|
3254 uint8_t *src= s-> input_picture.data[0];
|
|
3255 uint8_t *dst= s->current_picture.data[0];
|
|
3256 const int stride= s->current_picture.linesize[0];
|
|
3257 const int block_w= MB_SIZE >> s->block_max_depth;
|
|
3258 const int sx= block_w*mb_x - block_w/2;
|
|
3259 const int sy= block_w*mb_y - block_w/2;
|
|
3260 const int w= s->plane[0].width;
|
|
3261 const int h= s->plane[0].height;
|
|
3262 int y;
|
|
3263
|
|
3264 for(y=sy; y<0; y++)
|
|
3265 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
|
|
3266 for(y=h; y<sy+block_w*2; y++)
|
|
3267 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
|
|
3268 if(sx<0){
|
|
3269 for(y=sy; y<sy+block_w*2; y++)
|
|
3270 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
|
|
3271 }
|
|
3272 if(sx+block_w*2 > w){
|
|
3273 for(y=sy; y<sy+block_w*2; y++)
|
|
3274 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
|
|
3275 }
|
|
3276 }
|
|
3277
|
|
3278 // intra(black) = neighbors' contribution to the current block
|
|
3279 for(i=0; i<3; i++)
|
|
3280 color[i]= get_dc(s, mb_x, mb_y, i);
|
|
3281
|
|
3282 // get previous score (cant be cached due to OBMC)
|
|
3283 if(pass > 0 && (block->type&BLOCK_INTRA)){
|
|
3284 int color0[3]= {block->color[0], block->color[1], block->color[2]};
|
|
3285 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
|
|
3286 }else
|
|
3287 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
|
|
3288
|
|
3289 ref_b= *block;
|
|
3290 ref_rd= best_rd;
|
|
3291 for(ref=0; ref < s->ref_frames; ref++){
|
|
3292 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
|
|
3293 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
|
|
3294 continue;
|
|
3295 block->ref= ref;
|
|
3296 best_rd= INT_MAX;
|
|
3297
|
|
3298 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
|
|
3299 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
|
|
3300 if(tb)
|
|
3301 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
|
|
3302 if(lb)
|
|
3303 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
|
|
3304 if(rb)
|
|
3305 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
|
|
3306 if(bb)
|
|
3307 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
|
|
3308
|
|
3309 /* fullpel ME */
|
|
3310 //FIXME avoid subpel interpol / round to nearest integer
|
|
3311 do{
|
|
3312 dia_change=0;
|
|
3313 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
|
|
3314 for(j=0; j<i; j++){
|
|
3315 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
|
|
3316 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
|
|
3317 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
|
|
3318 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
|
|
3319 }
|
|
3320 }
|
|
3321 }while(dia_change);
|
|
3322 /* subpel ME */
|
|
3323 do{
|
|
3324 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
|
|
3325 dia_change=0;
|
|
3326 for(i=0; i<8; i++)
|
|
3327 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
|
|
3328 }while(dia_change);
|
|
3329 //FIXME or try the standard 2 pass qpel or similar
|
|
3330
|
|
3331 mvr[0][0]= block->mx;
|
|
3332 mvr[0][1]= block->my;
|
|
3333 if(ref_rd > best_rd){
|
|
3334 ref_rd= best_rd;
|
|
3335 ref_b= *block;
|
|
3336 }
|
|
3337 }
|
|
3338 best_rd= ref_rd;
|
|
3339 *block= ref_b;
|
|
3340 #if 1
|
|
3341 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
|
|
3342 //FIXME RD style color selection
|
|
3343 #endif
|
|
3344 if(!same_block(block, &backup)){
|
|
3345 if(tb ) tb ->type &= ~BLOCK_OPT;
|
|
3346 if(lb ) lb ->type &= ~BLOCK_OPT;
|
|
3347 if(rb ) rb ->type &= ~BLOCK_OPT;
|
|
3348 if(bb ) bb ->type &= ~BLOCK_OPT;
|
|
3349 if(tlb) tlb->type &= ~BLOCK_OPT;
|
|
3350 if(trb) trb->type &= ~BLOCK_OPT;
|
|
3351 if(blb) blb->type &= ~BLOCK_OPT;
|
|
3352 if(brb) brb->type &= ~BLOCK_OPT;
|
|
3353 change ++;
|
|
3354 }
|
|
3355 }
|
|
3356 }
|
|
3357 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
|
|
3358 if(!change)
|
|
3359 break;
|
|
3360 }
|
|
3361
|
|
3362 if(s->block_max_depth == 1){
|
|
3363 int change= 0;
|
|
3364 for(mb_y= 0; mb_y<b_height; mb_y+=2){
|
|
3365 for(mb_x= 0; mb_x<b_width; mb_x+=2){
|
|
3366 int i;
|
|
3367 int best_rd, init_rd;
|
|
3368 const int index= mb_x + mb_y * b_stride;
|
|
3369 BlockNode *b[4];
|
|
3370
|
|
3371 b[0]= &s->block[index];
|
|
3372 b[1]= b[0]+1;
|
|
3373 b[2]= b[0]+b_stride;
|
|
3374 b[3]= b[2]+1;
|
|
3375 if(same_block(b[0], b[1]) &&
|
|
3376 same_block(b[0], b[2]) &&
|
|
3377 same_block(b[0], b[3]))
|
|
3378 continue;
|
|
3379
|
|
3380 if(!s->me_cache_generation)
|
|
3381 memset(s->me_cache, 0, sizeof(s->me_cache));
|
|
3382 s->me_cache_generation += 1<<22;
|
|
3383
|
|
3384 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
|
|
3385
|
|
3386 //FIXME more multiref search?
|
|
3387 check_4block_inter(s, mb_x, mb_y,
|
|
3388 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
|
|
3389 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
|
|
3390
|
|
3391 for(i=0; i<4; i++)
|
|
3392 if(!(b[i]->type&BLOCK_INTRA))
|
|
3393 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
|
|
3394
|
|
3395 if(init_rd != best_rd)
|
|
3396 change++;
|
|
3397 }
|
|
3398 }
|
|
3399 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
|
|
3400 }
|
|
3401 }
|
|
3402
|
|
3403 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
|
|
3404 const int level= b->level;
|
|
3405 const int w= b->width;
|
|
3406 const int h= b->height;
|
|
3407 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
|
|
3408 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
|
|
3409 int x,y, thres1, thres2;
|
|
3410 // START_TIMER
|
|
3411
|
|
3412 if(s->qlog == LOSSLESS_QLOG) return;
|
|
3413
|
|
3414 bias= bias ? 0 : (3*qmul)>>3;
|
|
3415 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
|
|
3416 thres2= 2*thres1;
|
|
3417
|
|
3418 if(!bias){
|
|
3419 for(y=0; y<h; y++){
|
|
3420 for(x=0; x<w; x++){
|
|
3421 int i= src[x + y*stride];
|
|
3422
|
|
3423 if((unsigned)(i+thres1) > thres2){
|
|
3424 if(i>=0){
|
|
3425 i<<= QEXPSHIFT;
|
|
3426 i/= qmul; //FIXME optimize
|
|
3427 src[x + y*stride]= i;
|
|
3428 }else{
|
|
3429 i= -i;
|
|
3430 i<<= QEXPSHIFT;
|
|
3431 i/= qmul; //FIXME optimize
|
|
3432 src[x + y*stride]= -i;
|
|
3433 }
|
|
3434 }else
|
|
3435 src[x + y*stride]= 0;
|
|
3436 }
|
|
3437 }
|
|
3438 }else{
|
|
3439 for(y=0; y<h; y++){
|
|
3440 for(x=0; x<w; x++){
|
|
3441 int i= src[x + y*stride];
|
|
3442
|
|
3443 if((unsigned)(i+thres1) > thres2){
|
|
3444 if(i>=0){
|
|
3445 i<<= QEXPSHIFT;
|
|
3446 i= (i + bias) / qmul; //FIXME optimize
|
|
3447 src[x + y*stride]= i;
|
|
3448 }else{
|
|
3449 i= -i;
|
|
3450 i<<= QEXPSHIFT;
|
|
3451 i= (i + bias) / qmul; //FIXME optimize
|
|
3452 src[x + y*stride]= -i;
|
|
3453 }
|
|
3454 }else
|
|
3455 src[x + y*stride]= 0;
|
|
3456 }
|
|
3457 }
|
|
3458 }
|
|
3459 if(level+1 == s->spatial_decomposition_count){
|
|
3460 // STOP_TIMER("quantize")
|
|
3461 }
|
|
3462 }
|
|
3463
|
|
3464 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
|
|
3465 const int w= b->width;
|
|
3466 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
|
|
3467 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
|
|
3468 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
|
|
3469 int x,y;
|
|
3470 START_TIMER
|
|
3471
|
|
3472 if(s->qlog == LOSSLESS_QLOG) return;
|
|
3473
|
|
3474 for(y=start_y; y<end_y; y++){
|
|
3475 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
|
|
3476 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
|
|
3477 for(x=0; x<w; x++){
|
|
3478 int i= line[x];
|
|
3479 if(i<0){
|
|
3480 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
|
|
3481 }else if(i>0){
|
|
3482 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
|
|
3483 }
|
|
3484 }
|
|
3485 }
|
|
3486 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
|
|
3487 STOP_TIMER("dquant")
|
|
3488 }
|
|
3489 }
|
|
3490
|
|
3491 static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
|
|
3492 const int w= b->width;
|
|
3493 const int h= b->height;
|
|
3494 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
|
|
3495 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
|
|
3496 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
|
|
3497 int x,y;
|
|
3498 START_TIMER
|
|
3499
|
|
3500 if(s->qlog == LOSSLESS_QLOG) return;
|
|
3501
|
|
3502 for(y=0; y<h; y++){
|
|
3503 for(x=0; x<w; x++){
|
|
3504 int i= src[x + y*stride];
|
|
3505 if(i<0){
|
|
3506 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
|
|
3507 }else if(i>0){
|
|
3508 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
|
|
3509 }
|
|
3510 }
|
|
3511 }
|
|
3512 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
|
|
3513 STOP_TIMER("dquant")
|
|
3514 }
|
|
3515 }
|
|
3516
|
|
3517 static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
|
|
3518 const int w= b->width;
|
|
3519 const int h= b->height;
|
|
3520 int x,y;
|
|
3521
|
|
3522 for(y=h-1; y>=0; y--){
|
|
3523 for(x=w-1; x>=0; x--){
|
|
3524 int i= x + y*stride;
|
|
3525
|
|
3526 if(x){
|
|
3527 if(use_median){
|
|
3528 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
|
|
3529 else src[i] -= src[i - 1];
|
|
3530 }else{
|
|
3531 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
|
|
3532 else src[i] -= src[i - 1];
|
|
3533 }
|
|
3534 }else{
|
|
3535 if(y) src[i] -= src[i - stride];
|
|
3536 }
|
|
3537 }
|
|
3538 }
|
|
3539 }
|
|
3540
|
|
3541 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
|
|
3542 const int w= b->width;
|
|
3543 int x,y;
|
|
3544
|
|
3545 // START_TIMER
|
|
3546
|
|
3547 DWTELEM * line;
|
|
3548 DWTELEM * prev;
|
|
3549
|
|
3550 if (start_y != 0)
|
|
3551 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
|
|
3552
|
|
3553 for(y=start_y; y<end_y; y++){
|
|
3554 prev = line;
|
|
3555 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
|
|
3556 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
|
|
3557 for(x=0; x<w; x++){
|
|
3558 if(x){
|
|
3559 if(use_median){
|
|
3560 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
|
|
3561 else line[x] += line[x - 1];
|
|
3562 }else{
|
|
3563 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
|
|
3564 else line[x] += line[x - 1];
|
|
3565 }
|
|
3566 }else{
|
|
3567 if(y) line[x] += prev[x];
|
|
3568 }
|
|
3569 }
|
|
3570 }
|
|
3571
|
|
3572 // STOP_TIMER("correlate")
|
|
3573 }
|
|
3574
|
|
3575 static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
|
|
3576 const int w= b->width;
|
|
3577 const int h= b->height;
|
|
3578 int x,y;
|
|
3579
|
|
3580 for(y=0; y<h; y++){
|
|
3581 for(x=0; x<w; x++){
|
|
3582 int i= x + y*stride;
|
|
3583
|
|
3584 if(x){
|
|
3585 if(use_median){
|
|
3586 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
|
|
3587 else src[i] += src[i - 1];
|
|
3588 }else{
|
|
3589 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
|
|
3590 else src[i] += src[i - 1];
|
|
3591 }
|
|
3592 }else{
|
|
3593 if(y) src[i] += src[i - stride];
|
|
3594 }
|
|
3595 }
|
|
3596 }
|
|
3597 }
|
|
3598
|
|
3599 static void encode_header(SnowContext *s){
|
|
3600 int plane_index, level, orientation;
|
|
3601 uint8_t kstate[32];
|
|
3602
|
|
3603 memset(kstate, MID_STATE, sizeof(kstate));
|
|
3604
|
|
3605 put_rac(&s->c, kstate, s->keyframe);
|
|
3606 if(s->keyframe || s->always_reset)
|
|
3607 reset_contexts(s);
|
|
3608 if(s->keyframe){
|
|
3609 put_symbol(&s->c, s->header_state, s->version, 0);
|
|
3610 put_rac(&s->c, s->header_state, s->always_reset);
|
|
3611 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
|
|
3612 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
|
|
3613 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
|
|
3614 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
|
|
3615 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
|
|
3616 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
|
|
3617 put_rac(&s->c, s->header_state, s->spatial_scalability);
|
|
3618 // put_rac(&s->c, s->header_state, s->rate_scalability);
|
|
3619 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
|
|
3620
|
|
3621 for(plane_index=0; plane_index<2; plane_index++){
|
|
3622 for(level=0; level<s->spatial_decomposition_count; level++){
|
|
3623 for(orientation=level ? 1:0; orientation<4; orientation++){
|
|
3624 if(orientation==2) continue;
|
|
3625 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
|
|
3626 }
|
|
3627 }
|
|
3628 }
|
|
3629 }
|
|
3630 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
|
|
3631 put_symbol(&s->c, s->header_state, s->qlog, 1);
|
|
3632 put_symbol(&s->c, s->header_state, s->mv_scale, 0);
|
|
3633 put_symbol(&s->c, s->header_state, s->qbias, 1);
|
|
3634 put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
|
|
3635 }
|
|
3636
|
|
3637 static int decode_header(SnowContext *s){
|
|
3638 int plane_index, level, orientation;
|
|
3639 uint8_t kstate[32];
|
|
3640
|
|
3641 memset(kstate, MID_STATE, sizeof(kstate));
|
|
3642
|
|
3643 s->keyframe= get_rac(&s->c, kstate);
|
|
3644 if(s->keyframe || s->always_reset)
|
|
3645 reset_contexts(s);
|
|
3646 if(s->keyframe){
|
|
3647 s->version= get_symbol(&s->c, s->header_state, 0);
|
|
3648 if(s->version>0){
|
|
3649 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
|
|
3650 return -1;
|
|
3651 }
|
|
3652 s->always_reset= get_rac(&s->c, s->header_state);
|
|
3653 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
|
|
3654 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
|
|
3655 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
|
|
3656 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
|
|
3657 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
|
|
3658 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
|
|
3659 s->spatial_scalability= get_rac(&s->c, s->header_state);
|
|
3660 // s->rate_scalability= get_rac(&s->c, s->header_state);
|
|
3661 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
|
|
3662
|
|
3663 for(plane_index=0; plane_index<3; plane_index++){
|
|
3664 for(level=0; level<s->spatial_decomposition_count; level++){
|
|
3665 for(orientation=level ? 1:0; orientation<4; orientation++){
|
|
3666 int q;
|
|
3667 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
|
|
3668 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
|
|
3669 else q= get_symbol(&s->c, s->header_state, 1);
|
|
3670 s->plane[plane_index].band[level][orientation].qlog= q;
|
|
3671 }
|
|
3672 }
|
|
3673 }
|
|
3674 }
|
|
3675
|
|
3676 s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
|
|
3677 if(s->spatial_decomposition_type > 2){
|
|
3678 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
|
|
3679 return -1;
|
|
3680 }
|
|
3681
|
|
3682 s->qlog= get_symbol(&s->c, s->header_state, 1);
|
|
3683 s->mv_scale= get_symbol(&s->c, s->header_state, 0);
|
|
3684 s->qbias= get_symbol(&s->c, s->header_state, 1);
|
|
3685 s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
|
|
3686 if(s->block_max_depth > 1 || s->block_max_depth < 0){
|
|
3687 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
|
|
3688 s->block_max_depth= 0;
|
|
3689 return -1;
|
|
3690 }
|
|
3691
|
|
3692 return 0;
|
|
3693 }
|
|
3694
|
|
3695 static void init_qexp(void){
|
|
3696 int i;
|
|
3697 double v=128;
|
|
3698
|
|
3699 for(i=0; i<QROOT; i++){
|
|
3700 qexp[i]= lrintf(v);
|
|
3701 v *= pow(2, 1.0 / QROOT);
|
|
3702 }
|
|
3703 }
|
|
3704
|
|
3705 static int common_init(AVCodecContext *avctx){
|
|
3706 SnowContext *s = avctx->priv_data;
|
|
3707 int width, height;
|
|
3708 int level, orientation, plane_index, dec;
|
|
3709 int i, j;
|
|
3710
|
|
3711 s->avctx= avctx;
|
|
3712
|
|
3713 dsputil_init(&s->dsp, avctx);
|
|
3714
|
|
3715 #define mcf(dx,dy)\
|
|
3716 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
|
|
3717 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
|
|
3718 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
|
|
3719 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
|
|
3720 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
|
|
3721 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
|
|
3722
|
|
3723 mcf( 0, 0)
|
|
3724 mcf( 4, 0)
|
|
3725 mcf( 8, 0)
|
|
3726 mcf(12, 0)
|
|
3727 mcf( 0, 4)
|
|
3728 mcf( 4, 4)
|
|
3729 mcf( 8, 4)
|
|
3730 mcf(12, 4)
|
|
3731 mcf( 0, 8)
|
|
3732 mcf( 4, 8)
|
|
3733 mcf( 8, 8)
|
|
3734 mcf(12, 8)
|
|
3735 mcf( 0,12)
|
|
3736 mcf( 4,12)
|
|
3737 mcf( 8,12)
|
|
3738 mcf(12,12)
|
|
3739
|
|
3740 #define mcfh(dx,dy)\
|
|
3741 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
|
|
3742 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
|
|
3743 mc_block_hpel ## dx ## dy ## 16;\
|
|
3744 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
|
|
3745 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
|
|
3746 mc_block_hpel ## dx ## dy ## 8;
|
|
3747
|
|
3748 mcfh(0, 0)
|
|
3749 mcfh(8, 0)
|
|
3750 mcfh(0, 8)
|
|
3751 mcfh(8, 8)
|
|
3752
|
|
3753 if(!qexp[0])
|
|
3754 init_qexp();
|
|
3755
|
|
3756 dec= s->spatial_decomposition_count= 5;
|
|
3757 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
|
|
3758
|
|
3759 s->chroma_h_shift= 1; //FIXME XXX
|
|
3760 s->chroma_v_shift= 1;
|
|
3761
|
|
3762 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
|
|
3763
|
|
3764 width= s->avctx->width;
|
|
3765 height= s->avctx->height;
|
|
3766
|
|
3767 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
|
|
3768
|
|
3769 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
|
|
3770 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
|
|
3771
|
|
3772 for(plane_index=0; plane_index<3; plane_index++){
|
|
3773 int w= s->avctx->width;
|
|
3774 int h= s->avctx->height;
|
|
3775
|
|
3776 if(plane_index){
|
|
3777 w>>= s->chroma_h_shift;
|
|
3778 h>>= s->chroma_v_shift;
|
|
3779 }
|
|
3780 s->plane[plane_index].width = w;
|
|
3781 s->plane[plane_index].height= h;
|
|
3782 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
|
|
3783 for(level=s->spatial_decomposition_count-1; level>=0; level--){
|
|
3784 for(orientation=level ? 1 : 0; orientation<4; orientation++){
|
|
3785 SubBand *b= &s->plane[plane_index].band[level][orientation];
|
|
3786
|
|
3787 b->buf= s->spatial_dwt_buffer;
|
|
3788 b->level= level;
|
|
3789 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
|
|
3790 b->width = (w + !(orientation&1))>>1;
|
|
3791 b->height= (h + !(orientation>1))>>1;
|
|
3792
|
|
3793 b->stride_line = 1 << (s->spatial_decomposition_count - level);
|
|
3794 b->buf_x_offset = 0;
|
|
3795 b->buf_y_offset = 0;
|
|
3796
|
|
3797 if(orientation&1){
|
|
3798 b->buf += (w+1)>>1;
|
|
3799 b->buf_x_offset = (w+1)>>1;
|
|
3800 }
|
|
3801 if(orientation>1){
|
|
3802 b->buf += b->stride>>1;
|
|
3803 b->buf_y_offset = b->stride_line >> 1;
|
|
3804 }
|
|
3805
|
|
3806 if(level)
|
|
3807 b->parent= &s->plane[plane_index].band[level-1][orientation];
|
|
3808 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
|
|
3809 }
|
|
3810 w= (w+1)>>1;
|
|
3811 h= (h+1)>>1;
|
|
3812 }
|
|
3813 }
|
|
3814
|
|
3815 for(i=0; i<MAX_REF_FRAMES; i++)
|
|
3816 for(j=0; j<MAX_REF_FRAMES; j++)
|
|
3817 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
|
|
3818
|
|
3819 reset_contexts(s);
|
|
3820 /*
|
|
3821 width= s->width= avctx->width;
|
|
3822 height= s->height= avctx->height;
|
|
3823
|
|
3824 assert(width && height);
|
|
3825 */
|
|
3826 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
|
|
3827
|
|
3828 return 0;
|
|
3829 }
|
|
3830
|
|
3831 static int qscale2qlog(int qscale){
|
|
3832 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
|
|
3833 + 61*QROOT/8; //<64 >60
|
|
3834 }
|
|
3835
|
|
3836 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
|
|
3837 {
|
|
3838 /* estimate the frame's complexity as a sum of weighted dwt coefs.
|
|
3839 * FIXME we know exact mv bits at this point,
|
|
3840 * but ratecontrol isn't set up to include them. */
|
|
3841 uint32_t coef_sum= 0;
|
|
3842 int level, orientation, delta_qlog;
|
|
3843
|
|
3844 for(level=0; level<s->spatial_decomposition_count; level++){
|
|
3845 for(orientation=level ? 1 : 0; orientation<4; orientation++){
|
|
3846 SubBand *b= &s->plane[0].band[level][orientation];
|
|
3847 DWTELEM *buf= b->buf;
|
|
3848 const int w= b->width;
|
|
3849 const int h= b->height;
|
|
3850 const int stride= b->stride;
|
|
3851 const int qlog= clip(2*QROOT + b->qlog, 0, QROOT*16);
|
|
3852 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
|
|
3853 const int qdiv= (1<<16)/qmul;
|
|
3854 int x, y;
|
|
3855 if(orientation==0)
|
|
3856 decorrelate(s, b, buf, stride, 1, 0);
|
|
3857 for(y=0; y<h; y++)
|
|
3858 for(x=0; x<w; x++)
|
|
3859 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
|
|
3860 if(orientation==0)
|
|
3861 correlate(s, b, buf, stride, 1, 0);
|
|
3862 }
|
|
3863 }
|
|
3864
|
|
3865 /* ugly, ratecontrol just takes a sqrt again */
|
|
3866 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
|
|
3867 assert(coef_sum < INT_MAX);
|
|
3868
|
|
3869 if(pict->pict_type == I_TYPE){
|
|
3870 s->m.current_picture.mb_var_sum= coef_sum;
|
|
3871 s->m.current_picture.mc_mb_var_sum= 0;
|
|
3872 }else{
|
|
3873 s->m.current_picture.mc_mb_var_sum= coef_sum;
|
|
3874 s->m.current_picture.mb_var_sum= 0;
|
|
3875 }
|
|
3876
|
|
3877 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
|
|
3878 if (pict->quality < 0)
|
|
3879 return INT_MIN;
|
|
3880 s->lambda= pict->quality * 3/2;
|
|
3881 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
|
|
3882 s->qlog+= delta_qlog;
|
|
3883 return delta_qlog;
|
|
3884 }
|
|
3885
|
|
3886 static void calculate_vissual_weight(SnowContext *s, Plane *p){
|
|
3887 int width = p->width;
|
|
3888 int height= p->height;
|
|
3889 int level, orientation, x, y;
|
|
3890
|
|
3891 for(level=0; level<s->spatial_decomposition_count; level++){
|
|
3892 for(orientation=level ? 1 : 0; orientation<4; orientation++){
|
|
3893 SubBand *b= &p->band[level][orientation];
|
|
3894 DWTELEM *buf= b->buf;
|
|
3895 int64_t error=0;
|
|
3896
|
|
3897 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
|
|
3898 buf[b->width/2 + b->height/2*b->stride]= 256*256;
|
|
3899 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
|
|
3900 for(y=0; y<height; y++){
|
|
3901 for(x=0; x<width; x++){
|
|
3902 int64_t d= s->spatial_dwt_buffer[x + y*width];
|
|
3903 error += d*d;
|
|
3904 }
|
|
3905 }
|
|
3906
|
|
3907 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
|
|
3908 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
|
|
3909 }
|
|
3910 }
|
|
3911 }
|
|
3912
|
|
3913 static int encode_init(AVCodecContext *avctx)
|
|
3914 {
|
|
3915 SnowContext *s = avctx->priv_data;
|
|
3916 int plane_index;
|
|
3917
|
|
3918 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
|
|
3919 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
|
|
3920 "use vstrict=-2 / -strict -2 to use it anyway\n");
|
|
3921 return -1;
|
|
3922 }
|
|
3923
|
|
3924 if(avctx->prediction_method == DWT_97
|
|
3925 && (avctx->flags & CODEC_FLAG_QSCALE)
|
|
3926 && avctx->global_quality == 0){
|
|
3927 av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
|
|
3928 return -1;
|
|
3929 }
|
|
3930
|
|
3931 common_init(avctx);
|
|
3932 alloc_blocks(s);
|
|
3933
|
|
3934 s->version=0;
|
|
3935
|
|
3936 s->m.avctx = avctx;
|
|
3937 s->m.flags = avctx->flags;
|
|
3938 s->m.bit_rate= avctx->bit_rate;
|
|
3939
|
|
3940 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
|
|
3941 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
|
|
3942 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
|
|
3943 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
|
|
3944 h263_encode_init(&s->m); //mv_penalty
|
|
3945
|
|
3946 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
|
|
3947
|
|
3948 if(avctx->flags&CODEC_FLAG_PASS1){
|
|
3949 if(!avctx->stats_out)
|
|
3950 avctx->stats_out = av_mallocz(256);
|
|
3951 }
|
|
3952 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
|
|
3953 if(ff_rate_control_init(&s->m) < 0)
|
|
3954 return -1;
|
|
3955 }
|
|
3956 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
|
|
3957
|
|
3958 for(plane_index=0; plane_index<3; plane_index++){
|
|
3959 calculate_vissual_weight(s, &s->plane[plane_index]);
|
|
3960 }
|
|
3961
|
|
3962
|
|
3963 avctx->coded_frame= &s->current_picture;
|
|
3964 switch(avctx->pix_fmt){
|
|
3965 // case PIX_FMT_YUV444P:
|
|
3966 // case PIX_FMT_YUV422P:
|
|
3967 case PIX_FMT_YUV420P:
|
|
3968 case PIX_FMT_GRAY8:
|
|
3969 // case PIX_FMT_YUV411P:
|
|
3970 // case PIX_FMT_YUV410P:
|
|
3971 s->colorspace_type= 0;
|
|
3972 break;
|
|
3973 /* case PIX_FMT_RGBA32:
|
|
3974 s->colorspace= 1;
|
|
3975 break;*/
|
|
3976 default:
|
|
3977 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
|
|
3978 return -1;
|
|
3979 }
|
|
3980 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
|
|
3981 s->chroma_h_shift= 1;
|
|
3982 s->chroma_v_shift= 1;
|
|
3983
|
|
3984 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
|
|
3985 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
|
|
3986
|
|
3987 s->avctx->get_buffer(s->avctx, &s->input_picture);
|
|
3988
|
|
3989 if(s->avctx->me_method == ME_ITER){
|
|
3990 int i;
|
|
3991 int size= s->b_width * s->b_height << 2*s->block_max_depth;
|
|
3992 for(i=0; i<s->max_ref_frames; i++){
|
|
3993 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
|
|
3994 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
|
|
3995 }
|
|
3996 }
|
|
3997
|
|
3998 return 0;
|
|
3999 }
|
|
4000
|
|
4001 static int frame_start(SnowContext *s){
|
|
4002 AVFrame tmp;
|
|
4003 int w= s->avctx->width; //FIXME round up to x16 ?
|
|
4004 int h= s->avctx->height;
|
|
4005
|
|
4006 if(s->current_picture.data[0]){
|
|
4007 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
|
|
4008 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
|
|
4009 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
|
|
4010 }
|
|
4011
|
|
4012 tmp= s->last_picture[s->max_ref_frames-1];
|
|
4013 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
|
|
4014 s->last_picture[0]= s->current_picture;
|
|
4015 s->current_picture= tmp;
|
|
4016
|
|
4017 if(s->keyframe){
|
|
4018 s->ref_frames= 0;
|
|
4019 }else{
|
|
4020 int i;
|
|
4021 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
|
|
4022 if(i && s->last_picture[i-1].key_frame)
|
|
4023 break;
|
|
4024 s->ref_frames= i;
|
|
4025 }
|
|
4026
|
|
4027 s->current_picture.reference= 1;
|
|
4028 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
|
|
4029 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
|
|
4030 return -1;
|
|
4031 }
|
|
4032
|
|
4033 s->current_picture.key_frame= s->keyframe;
|
|
4034
|
|
4035 return 0;
|
|
4036 }
|
|
4037
|
|
4038 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
|
|
4039 SnowContext *s = avctx->priv_data;
|
|
4040 RangeCoder * const c= &s->c;
|
|
4041 AVFrame *pict = data;
|
|
4042 const int width= s->avctx->width;
|
|
4043 const int height= s->avctx->height;
|
|
4044 int level, orientation, plane_index, i, y;
|
|
4045 uint8_t rc_header_bak[sizeof(s->header_state)];
|
|
4046 uint8_t rc_block_bak[sizeof(s->block_state)];
|
|
4047
|
|
4048 ff_init_range_encoder(c, buf, buf_size);
|
|
4049 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
|
|
4050
|
|
4051 for(i=0; i<3; i++){
|
|
4052 int shift= !!i;
|
|
4053 for(y=0; y<(height>>shift); y++)
|
|
4054 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
|
|
4055 &pict->data[i][y * pict->linesize[i]],
|
|
4056 width>>shift);
|
|
4057 }
|
|
4058 s->new_picture = *pict;
|
|
4059
|
|
4060 s->m.picture_number= avctx->frame_number;
|
|
4061 if(avctx->flags&CODEC_FLAG_PASS2){
|
|
4062 s->m.pict_type =
|
|
4063 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
|
|
4064 s->keyframe= pict->pict_type==FF_I_TYPE;
|
|
4065 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
|
|
4066 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
|
|
4067 if (pict->quality < 0)
|
|
4068 return -1;
|
|
4069 }
|
|
4070 }else{
|
|
4071 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
|
|
4072 s->m.pict_type=
|
|
4073 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
|
|
4074 }
|
|
4075
|
|
4076 if(s->pass1_rc && avctx->frame_number == 0)
|
|
4077 pict->quality= 2*FF_QP2LAMBDA;
|
|
4078 if(pict->quality){
|
|
4079 s->qlog= qscale2qlog(pict->quality);
|
|
4080 s->lambda = pict->quality * 3/2;
|
|
4081 }
|
|
4082 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
|
|
4083 s->qlog= LOSSLESS_QLOG;
|
|
4084 s->lambda = 0;
|
|
4085 }//else keep previous frame's qlog until after motion est
|
|
4086
|
|
4087 frame_start(s);
|
|
4088
|
|
4089 s->m.current_picture_ptr= &s->m.current_picture;
|
|
4090 if(pict->pict_type == P_TYPE){
|
|
4091 int block_width = (width +15)>>4;
|
|
4092 int block_height= (height+15)>>4;
|
|
4093 int stride= s->current_picture.linesize[0];
|
|
4094
|
|
4095 assert(s->current_picture.data[0]);
|
|
4096 assert(s->last_picture[0].data[0]);
|
|
4097
|
|
4098 s->m.avctx= s->avctx;
|
|
4099 s->m.current_picture.data[0]= s->current_picture.data[0];
|
|
4100 s->m. last_picture.data[0]= s->last_picture[0].data[0];
|
|
4101 s->m. new_picture.data[0]= s-> input_picture.data[0];
|
|
4102 s->m. last_picture_ptr= &s->m. last_picture;
|
|
4103 s->m.linesize=
|
|
4104 s->m. last_picture.linesize[0]=
|
|
4105 s->m. new_picture.linesize[0]=
|
|
4106 s->m.current_picture.linesize[0]= stride;
|
|
4107 s->m.uvlinesize= s->current_picture.linesize[1];
|
|
4108 s->m.width = width;
|
|
4109 s->m.height= height;
|
|
4110 s->m.mb_width = block_width;
|
|
4111 s->m.mb_height= block_height;
|
|
4112 s->m.mb_stride= s->m.mb_width+1;
|
|
4113 s->m.b8_stride= 2*s->m.mb_width+1;
|
|
4114 s->m.f_code=1;
|
|
4115 s->m.pict_type= pict->pict_type;
|
|
4116 s->m.me_method= s->avctx->me_method;
|
|
4117 s->m.me.scene_change_score=0;
|
|
4118 s->m.flags= s->avctx->flags;
|
|
4119 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
|
|
4120 s->m.out_format= FMT_H263;
|
|
4121 s->m.unrestricted_mv= 1;
|
|
4122
|
|
4123 s->m.lambda = s->lambda;
|
|
4124 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
|
|
4125 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
|
|
4126
|
|
4127 s->m.dsp= s->dsp; //move
|
|
4128 ff_init_me(&s->m);
|
|
4129 s->dsp= s->m.dsp;
|
|
4130 }
|
|
4131
|
|
4132 if(s->pass1_rc){
|
|
4133 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
|
|
4134 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
|
|
4135 }
|
|
4136
|
|
4137 redo_frame:
|
|
4138
|
|
4139 s->m.pict_type = pict->pict_type;
|
|
4140 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
|
|
4141
|
|
4142 encode_header(s);
|
|
4143 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
|
|
4144 encode_blocks(s, 1);
|
|
4145 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
|
|
4146
|
|
4147 for(plane_index=0; plane_index<3; plane_index++){
|
|
4148 Plane *p= &s->plane[plane_index];
|
|
4149 int w= p->width;
|
|
4150 int h= p->height;
|
|
4151 int x, y;
|
|
4152 // int bits= put_bits_count(&s->c.pb);
|
|
4153
|
|
4154 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
|
|
4155 //FIXME optimize
|
|
4156 if(pict->data[plane_index]) //FIXME gray hack
|
|
4157 for(y=0; y<h; y++){
|
|
4158 for(x=0; x<w; x++){
|
|
4159 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
|
|
4160 }
|
|
4161 }
|
|
4162 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
|
|
4163
|
|
4164 if( plane_index==0
|
|
4165 && pict->pict_type == P_TYPE
|
|
4166 && !(avctx->flags&CODEC_FLAG_PASS2)
|
|
4167 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
|
|
4168 ff_init_range_encoder(c, buf, buf_size);
|
|
4169 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
|
|
4170 pict->pict_type= FF_I_TYPE;
|
|
4171 s->keyframe=1;
|
|
4172 s->current_picture.key_frame=1;
|
|
4173 reset_contexts(s);
|
|
4174 goto redo_frame;
|
|
4175 }
|
|
4176
|
|
4177 if(s->qlog == LOSSLESS_QLOG){
|
|
4178 for(y=0; y<h; y++){
|
|
4179 for(x=0; x<w; x++){
|
|
4180 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
|
|
4181 }
|
|
4182 }
|
|
4183 }
|
|
4184
|
|
4185 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
|
|
4186
|
|
4187 if(s->pass1_rc && plane_index==0){
|
|
4188 int delta_qlog = ratecontrol_1pass(s, pict);
|
|
4189 if (delta_qlog <= INT_MIN)
|
|
4190 return -1;
|
|
4191 if(delta_qlog){
|
|
4192 //reordering qlog in the bitstream would eliminate this reset
|
|
4193 ff_init_range_encoder(c, buf, buf_size);
|
|
4194 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
|
|
4195 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
|
|
4196 encode_header(s);
|
|
4197 encode_blocks(s, 0);
|
|
4198 }
|
|
4199 }
|
|
4200
|
|
4201 for(level=0; level<s->spatial_decomposition_count; level++){
|
|
4202 for(orientation=level ? 1 : 0; orientation<4; orientation++){
|
|
4203 SubBand *b= &p->band[level][orientation];
|
|
4204
|
|
4205 quantize(s, b, b->buf, b->stride, s->qbias);
|
|
4206 if(orientation==0)
|
|
4207 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
|
|
4208 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
|
|
4209 assert(b->parent==NULL || b->parent->stride == b->stride*2);
|
|
4210 if(orientation==0)
|
|
4211 correlate(s, b, b->buf, b->stride, 1, 0);
|
|
4212 }
|
|
4213 }
|
|
4214 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
|
|
4215
|
|
4216 for(level=0; level<s->spatial_decomposition_count; level++){
|
|
4217 for(orientation=level ? 1 : 0; orientation<4; orientation++){
|
|
4218 SubBand *b= &p->band[level][orientation];
|
|
4219
|
|
4220 dequantize(s, b, b->buf, b->stride);
|
|
4221 }
|
|
4222 }
|
|
4223
|
|
4224 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
|
|
4225 if(s->qlog == LOSSLESS_QLOG){
|
|
4226 for(y=0; y<h; y++){
|
|
4227 for(x=0; x<w; x++){
|
|
4228 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
|
|
4229 }
|
|
4230 }
|
|
4231 }
|
|
4232 {START_TIMER
|
|
4233 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
|
|
4234 STOP_TIMER("pred-conv")}
|
|
4235 }else{
|
|
4236 //ME/MC only
|
|
4237 if(pict->pict_type == I_TYPE){
|
|
4238 for(y=0; y<h; y++){
|
|
4239 for(x=0; x<w; x++){
|
|
4240 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
|
|
4241 pict->data[plane_index][y*pict->linesize[plane_index] + x];
|
|
4242 }
|
|
4243 }
|
|
4244 }else{
|
|
4245 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
|
|
4246 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
|
|
4247 }
|
|
4248 }
|
|
4249 if(s->avctx->flags&CODEC_FLAG_PSNR){
|
|
4250 int64_t error= 0;
|
|
4251
|
|
4252 if(pict->data[plane_index]) //FIXME gray hack
|
|
4253 for(y=0; y<h; y++){
|
|
4254 for(x=0; x<w; x++){
|
|
4255 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
|
|
4256 error += d*d;
|
|
4257 }
|
|
4258 }
|
|
4259 s->avctx->error[plane_index] += error;
|
|
4260 s->current_picture.error[plane_index] = error;
|
|
4261 }
|
|
4262 }
|
|
4263
|
|
4264 if(s->last_picture[s->max_ref_frames-1].data[0])
|
|
4265 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
|
|
4266
|
|
4267 s->current_picture.coded_picture_number = avctx->frame_number;
|
|
4268 s->current_picture.pict_type = pict->pict_type;
|
|
4269 s->current_picture.quality = pict->quality;
|
|
4270 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
|
|
4271 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
|
|
4272 s->m.current_picture.display_picture_number =
|
|
4273 s->m.current_picture.coded_picture_number = avctx->frame_number;
|
|
4274 s->m.current_picture.quality = pict->quality;
|
|
4275 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
|
|
4276 if(s->pass1_rc)
|
|
4277 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
|
|
4278 return -1;
|
|
4279 if(avctx->flags&CODEC_FLAG_PASS1)
|
|
4280 ff_write_pass1_stats(&s->m);
|
|
4281 s->m.last_pict_type = s->m.pict_type;
|
|
4282
|
|
4283 emms_c();
|
|
4284
|
|
4285 return ff_rac_terminate(c);
|
|
4286 }
|
|
4287
|
|
4288 static void common_end(SnowContext *s){
|
|
4289 int plane_index, level, orientation, i;
|
|
4290
|
|
4291 av_freep(&s->spatial_dwt_buffer);
|
|
4292
|
|
4293 av_freep(&s->m.me.scratchpad);
|
|
4294 av_freep(&s->m.me.map);
|
|
4295 av_freep(&s->m.me.score_map);
|
|
4296 av_freep(&s->m.obmc_scratchpad);
|
|
4297
|
|
4298 av_freep(&s->block);
|
|
4299
|
|
4300 for(i=0; i<MAX_REF_FRAMES; i++){
|
|
4301 av_freep(&s->ref_mvs[i]);
|
|
4302 av_freep(&s->ref_scores[i]);
|
|
4303 if(s->last_picture[i].data[0])
|
|
4304 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
|
|
4305 }
|
|
4306
|
|
4307 for(plane_index=0; plane_index<3; plane_index++){
|
|
4308 for(level=s->spatial_decomposition_count-1; level>=0; level--){
|
|
4309 for(orientation=level ? 1 : 0; orientation<4; orientation++){
|
|
4310 SubBand *b= &s->plane[plane_index].band[level][orientation];
|
|
4311
|
|
4312 av_freep(&b->x_coeff);
|
|
4313 }
|
|
4314 }
|
|
4315 }
|
|
4316 }
|
|
4317
|
|
4318 static int encode_end(AVCodecContext *avctx)
|
|
4319 {
|
|
4320 SnowContext *s = avctx->priv_data;
|
|
4321
|
|
4322 common_end(s);
|
|
4323 av_free(avctx->stats_out);
|
|
4324
|
|
4325 return 0;
|
|
4326 }
|
|
4327
|
|
4328 static int decode_init(AVCodecContext *avctx)
|
|
4329 {
|
|
4330 SnowContext *s = avctx->priv_data;
|
|
4331 int block_size;
|
|
4332
|
|
4333 avctx->pix_fmt= PIX_FMT_YUV420P;
|
|
4334
|
|
4335 common_init(avctx);
|
|
4336
|
|
4337 block_size = MB_SIZE >> s->block_max_depth;
|
|
4338 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
|
|
4339
|
|
4340 return 0;
|
|
4341 }
|
|
4342
|
|
4343 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
|
|
4344 SnowContext *s = avctx->priv_data;
|
|
4345 RangeCoder * const c= &s->c;
|
|
4346 int bytes_read;
|
|
4347 AVFrame *picture = data;
|
|
4348 int level, orientation, plane_index;
|
|
4349
|
|
4350 ff_init_range_decoder(c, buf, buf_size);
|
|
4351 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
|
|
4352
|
|
4353 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
|
|
4354 decode_header(s);
|
|
4355 if(!s->block) alloc_blocks(s);
|
|
4356
|
|
4357 frame_start(s);
|
|
4358 //keyframe flag dupliaction mess FIXME
|
|
4359 if(avctx->debug&FF_DEBUG_PICT_INFO)
|
|
4360 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
|
|
4361
|
|
4362 decode_blocks(s);
|
|
4363
|
|
4364 for(plane_index=0; plane_index<3; plane_index++){
|
|
4365 Plane *p= &s->plane[plane_index];
|
|
4366 int w= p->width;
|
|
4367 int h= p->height;
|
|
4368 int x, y;
|
|
4369 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
|
|
4370
|
|
4371 if(s->avctx->debug&2048){
|
|
4372 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
|
|
4373 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
|
|
4374
|
|
4375 for(y=0; y<h; y++){
|
|
4376 for(x=0; x<w; x++){
|
|
4377 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
|
|
4378 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
|
|
4379 }
|
|
4380 }
|
|
4381 }
|
|
4382
|
|
4383 { START_TIMER
|
|
4384 for(level=0; level<s->spatial_decomposition_count; level++){
|
|
4385 for(orientation=level ? 1 : 0; orientation<4; orientation++){
|
|
4386 SubBand *b= &p->band[level][orientation];
|
|
4387 unpack_coeffs(s, b, b->parent, orientation);
|
|
4388 }
|
|
4389 }
|
|
4390 STOP_TIMER("unpack coeffs");
|
|
4391 }
|
|
4392
|
|
4393 {START_TIMER
|
|
4394 const int mb_h= s->b_height << s->block_max_depth;
|
|
4395 const int block_size = MB_SIZE >> s->block_max_depth;
|
|
4396 const int block_w = plane_index ? block_size/2 : block_size;
|
|
4397 int mb_y;
|
|
4398 dwt_compose_t cs[MAX_DECOMPOSITIONS];
|
|
4399 int yd=0, yq=0;
|
|
4400 int y;
|
|
4401 int end_y;
|
|
4402
|
|
4403 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
|
|
4404 for(mb_y=0; mb_y<=mb_h; mb_y++){
|
|
4405
|
|
4406 int slice_starty = block_w*mb_y;
|
|
4407 int slice_h = block_w*(mb_y+1);
|
|
4408 if (!(s->keyframe || s->avctx->debug&512)){
|
|
4409 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
|
|
4410 slice_h -= (block_w >> 1);
|
|
4411 }
|
|
4412
|
|
4413 {
|
|
4414 START_TIMER
|
|
4415 for(level=0; level<s->spatial_decomposition_count; level++){
|
|
4416 for(orientation=level ? 1 : 0; orientation<4; orientation++){
|
|
4417 SubBand *b= &p->band[level][orientation];
|
|
4418 int start_y;
|
|
4419 int end_y;
|
|
4420 int our_mb_start = mb_y;
|
|
4421 int our_mb_end = (mb_y + 1);
|
|
4422 const int extra= 3;
|
|
4423 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
|
|
4424 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
|
|
4425 if (!(s->keyframe || s->avctx->debug&512)){
|
|
4426 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
|
|
4427 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
|
|
4428 }
|
|
4429 start_y = FFMIN(b->height, start_y);
|
|
4430 end_y = FFMIN(b->height, end_y);
|
|
4431
|
|
4432 if (start_y != end_y){
|
|
4433 if (orientation == 0){
|
|
4434 SubBand * correlate_band = &p->band[0][0];
|
|
4435 int correlate_end_y = FFMIN(b->height, end_y + 1);
|
|
4436 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
|
|
4437 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
|
|
4438 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
|
|
4439 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
|
|
4440 }
|
|
4441 else
|
|
4442 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
|
|
4443 }
|
|
4444 }
|
|
4445 }
|
|
4446 STOP_TIMER("decode_subband_slice");
|
|
4447 }
|
|
4448
|
|
4449 { START_TIMER
|
|
4450 for(; yd<slice_h; yd+=4){
|
|
4451 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
|
|
4452 }
|
|
4453 STOP_TIMER("idwt slice");}
|
|
4454
|
|
4455
|
|
4456 if(s->qlog == LOSSLESS_QLOG){
|
|
4457 for(; yq<slice_h && yq<h; yq++){
|
|
4458 DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
|
|
4459 for(x=0; x<w; x++){
|
|
4460 line[x] <<= FRAC_BITS;
|
|
4461 }
|
|
4462 }
|
|
4463 }
|
|
4464
|
|
4465 predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
|
|
4466
|
|
4467 y = FFMIN(p->height, slice_starty);
|
|
4468 end_y = FFMIN(p->height, slice_h);
|
|
4469 while(y < end_y)
|
|
4470 slice_buffer_release(&s->sb, y++);
|
|
4471 }
|
|
4472
|
|
4473 slice_buffer_flush(&s->sb);
|
|
4474
|
|
4475 STOP_TIMER("idwt + predict_slices")}
|
|
4476 }
|
|
4477
|
|
4478 emms_c();
|
|
4479
|
|
4480 if(s->last_picture[s->max_ref_frames-1].data[0])
|
|
4481 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
|
|
4482
|
|
4483 if(!(s->avctx->debug&2048))
|
|
4484 *picture= s->current_picture;
|
|
4485 else
|
|
4486 *picture= s->mconly_picture;
|
|
4487
|
|
4488 *data_size = sizeof(AVFrame);
|
|
4489
|
|
4490 bytes_read= c->bytestream - c->bytestream_start;
|
|
4491 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
|
|
4492
|
|
4493 return bytes_read;
|
|
4494 }
|
|
4495
|
|
4496 static int decode_end(AVCodecContext *avctx)
|
|
4497 {
|
|
4498 SnowContext *s = avctx->priv_data;
|
|
4499
|
|
4500 slice_buffer_destroy(&s->sb);
|
|
4501
|
|
4502 common_end(s);
|
|
4503
|
|
4504 return 0;
|
|
4505 }
|
|
4506
|
|
4507 AVCodec snow_decoder = {
|
|
4508 "snow",
|
|
4509 CODEC_TYPE_VIDEO,
|
|
4510 CODEC_ID_SNOW,
|
|
4511 sizeof(SnowContext),
|
|
4512 decode_init,
|
|
4513 NULL,
|
|
4514 decode_end,
|
|
4515 decode_frame,
|
|
4516 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
|
|
4517 NULL
|
|
4518 };
|
|
4519
|
|
4520 #ifdef CONFIG_ENCODERS
|
|
4521 AVCodec snow_encoder = {
|
|
4522 "snow",
|
|
4523 CODEC_TYPE_VIDEO,
|
|
4524 CODEC_ID_SNOW,
|
|
4525 sizeof(SnowContext),
|
|
4526 encode_init,
|
|
4527 encode_frame,
|
|
4528 encode_end,
|
|
4529 };
|
|
4530 #endif
|
|
4531
|
|
4532
|
|
4533 #if 0
|
|
4534 #undef malloc
|
|
4535 #undef free
|
|
4536 #undef printf
|
|
4537
|
|
4538 int main(){
|
|
4539 int width=256;
|
|
4540 int height=256;
|
|
4541 int buffer[2][width*height];
|
|
4542 SnowContext s;
|
|
4543 int i;
|
|
4544 s.spatial_decomposition_count=6;
|
|
4545 s.spatial_decomposition_type=1;
|
|
4546
|
|
4547 printf("testing 5/3 DWT\n");
|
|
4548 for(i=0; i<width*height; i++)
|
|
4549 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
|
|
4550
|
|
4551 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
|
|
4552 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
|
|
4553
|
|
4554 for(i=0; i<width*height; i++)
|
|
4555 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
|
|
4556
|
|
4557 printf("testing 9/7 DWT\n");
|
|
4558 s.spatial_decomposition_type=0;
|
|
4559 for(i=0; i<width*height; i++)
|
|
4560 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
|
|
4561
|
|
4562 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
|
|
4563 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
|
|
4564
|
|
4565 for(i=0; i<width*height; i++)
|
|
4566 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
|
|
4567
|
|
4568 #if 0
|
|
4569 printf("testing AC coder\n");
|
|
4570 memset(s.header_state, 0, sizeof(s.header_state));
|
|
4571 ff_init_range_encoder(&s.c, buffer[0], 256*256);
|
|
4572 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
|
|
4573
|
|
4574 for(i=-256; i<256; i++){
|
|
4575 START_TIMER
|
|
4576 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
|
|
4577 STOP_TIMER("put_symbol")
|
|
4578 }
|
|
4579 ff_rac_terminate(&s.c);
|
|
4580
|
|
4581 memset(s.header_state, 0, sizeof(s.header_state));
|
|
4582 ff_init_range_decoder(&s.c, buffer[0], 256*256);
|
|
4583 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
|
|
4584
|
|
4585 for(i=-256; i<256; i++){
|
|
4586 int j;
|
|
4587 START_TIMER
|
|
4588 j= get_symbol(&s.c, s.header_state, 1);
|
|
4589 STOP_TIMER("get_symbol")
|
|
4590 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
|
|
4591 }
|
|
4592 #endif
|
|
4593 {
|
|
4594 int level, orientation, x, y;
|
|
4595 int64_t errors[8][4];
|
|
4596 int64_t g=0;
|
|
4597
|
|
4598 memset(errors, 0, sizeof(errors));
|
|
4599 s.spatial_decomposition_count=3;
|
|
4600 s.spatial_decomposition_type=0;
|
|
4601 for(level=0; level<s.spatial_decomposition_count; level++){
|
|
4602 for(orientation=level ? 1 : 0; orientation<4; orientation++){
|
|
4603 int w= width >> (s.spatial_decomposition_count-level);
|
|
4604 int h= height >> (s.spatial_decomposition_count-level);
|
|
4605 int stride= width << (s.spatial_decomposition_count-level);
|
|
4606 DWTELEM *buf= buffer[0];
|
|
4607 int64_t error=0;
|
|
4608
|
|
4609 if(orientation&1) buf+=w;
|
|
4610 if(orientation>1) buf+=stride>>1;
|
|
4611
|
|
4612 memset(buffer[0], 0, sizeof(int)*width*height);
|
|
4613 buf[w/2 + h/2*stride]= 256*256;
|
|
4614 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
|
|
4615 for(y=0; y<height; y++){
|
|
4616 for(x=0; x<width; x++){
|
|
4617 int64_t d= buffer[0][x + y*width];
|
|
4618 error += d*d;
|
|
4619 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8lld ", d);
|
|
4620 }
|
|
4621 if(FFABS(height/2-y)<9 && level==2) printf("\n");
|
|
4622 }
|
|
4623 error= (int)(sqrt(error)+0.5);
|
|
4624 errors[level][orientation]= error;
|
|
4625 if(g) g=ff_gcd(g, error);
|
|
4626 else g= error;
|
|
4627 }
|
|
4628 }
|
|
4629 printf("static int const visual_weight[][4]={\n");
|
|
4630 for(level=0; level<s.spatial_decomposition_count; level++){
|
|
4631 printf(" {");
|
|
4632 for(orientation=0; orientation<4; orientation++){
|
|
4633 printf("%8lld,", errors[level][orientation]/g);
|
|
4634 }
|
|
4635 printf("},\n");
|
|
4636 }
|
|
4637 printf("};\n");
|
|
4638 {
|
|
4639 int level=2;
|
|
4640 int orientation=3;
|
|
4641 int w= width >> (s.spatial_decomposition_count-level);
|
|
4642 int h= height >> (s.spatial_decomposition_count-level);
|
|
4643 int stride= width << (s.spatial_decomposition_count-level);
|
|
4644 DWTELEM *buf= buffer[0];
|
|
4645 int64_t error=0;
|
|
4646
|
|
4647 buf+=w;
|
|
4648 buf+=stride>>1;
|
|
4649
|
|
4650 memset(buffer[0], 0, sizeof(int)*width*height);
|
|
4651 #if 1
|
|
4652 for(y=0; y<height; y++){
|
|
4653 for(x=0; x<width; x++){
|
|
4654 int tab[4]={0,2,3,1};
|
|
4655 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
|
|
4656 }
|
|
4657 }
|
|
4658 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
|
|
4659 #else
|
|
4660 for(y=0; y<h; y++){
|
|
4661 for(x=0; x<w; x++){
|
|
4662 buf[x + y*stride ]=169;
|
|
4663 buf[x + y*stride-w]=64;
|
|
4664 }
|
|
4665 }
|
|
4666 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
|
|
4667 #endif
|
|
4668 for(y=0; y<height; y++){
|
|
4669 for(x=0; x<width; x++){
|
|
4670 int64_t d= buffer[0][x + y*width];
|
|
4671 error += d*d;
|
|
4672 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8lld ", d);
|
|
4673 }
|
|
4674 if(FFABS(height/2-y)<9) printf("\n");
|
|
4675 }
|
|
4676 }
|
|
4677
|
|
4678 }
|
|
4679 return 0;
|
|
4680 }
|
|
4681 #endif
|
|
4682
|