comparison snow.c @ 2562:d0a58dca5ad2 libavcodec

slice-based idwt (10% faster decoding)
author lorenm
date Tue, 15 Mar 2005 21:05:34 +0000
parents d3885f927bc7
children a5a62827f195
comparison
equal deleted inserted replaced
2561:eb72c01df6ed 2562:d0a58dca5ad2
428 BlockNode *block; 428 BlockNode *block;
429 429
430 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) 430 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
431 }SnowContext; 431 }SnowContext;
432 432
433 typedef struct {
434 DWTELEM *b0;
435 DWTELEM *b1;
436 DWTELEM *b2;
437 DWTELEM *b3;
438 int y;
439 } dwt_compose_t;
440
433 #ifdef __sgi 441 #ifdef __sgi
434 // Avoid a name clash on SGI IRIX 442 // Avoid a name clash on SGI IRIX
435 #undef qexp 443 #undef qexp
436 #endif 444 #endif
437 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0 445 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
1164 for(i=0; i<width; i++){ 1172 for(i=0; i<width; i++){
1165 b1[i] -= (b0[i] + b2[i] + 2)>>2; 1173 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1166 } 1174 }
1167 } 1175 }
1168 1176
1169 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){ 1177 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1170 int y; 1178 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1171 DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride; 1179 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1172 DWTELEM *b1= buffer + mirror(-1 , height-1)*stride; 1180 cs->y = -1;
1173 1181 }
1174 for(y=-1; y<=height; y+=2){ 1182
1175 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; 1183 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1176 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; 1184 int y= cs->y;
1185 DWTELEM *b0= cs->b0;
1186 DWTELEM *b1= cs->b1;
1187 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1188 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1177 1189
1178 {START_TIMER 1190 {START_TIMER
1179 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width); 1191 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1180 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width); 1192 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1181 STOP_TIMER("vertical_compose53i*")} 1193 STOP_TIMER("vertical_compose53i*")}
1183 {START_TIMER 1195 {START_TIMER
1184 if(y-1 >= 0) horizontal_compose53i(b0, width); 1196 if(y-1 >= 0) horizontal_compose53i(b0, width);
1185 if(b0 <= b2) horizontal_compose53i(b1, width); 1197 if(b0 <= b2) horizontal_compose53i(b1, width);
1186 STOP_TIMER("horizontal_compose53i")} 1198 STOP_TIMER("horizontal_compose53i")}
1187 1199
1188 b0=b2; 1200 cs->b0 = b2;
1189 b1=b3; 1201 cs->b1 = b3;
1190 } 1202 cs->y += 2;
1203 }
1204
1205 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1206 dwt_compose_t cs;
1207 spatial_compose53i_init(&cs, buffer, height, stride);
1208 while(cs.y <= height)
1209 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1191 } 1210 }
1192 1211
1193 1212
1194 static void horizontal_compose97i(DWTELEM *b, int width){ 1213 static void horizontal_compose97i(DWTELEM *b, int width){
1195 DWTELEM temp[width]; 1214 DWTELEM temp[width];
1238 for(i=0; i<width; i++){ 1257 for(i=0; i<width; i++){
1239 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; 1258 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1240 } 1259 }
1241 } 1260 }
1242 1261
1243 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){ 1262 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1244 int y; 1263 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1245 DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride; 1264 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1246 DWTELEM *b1= buffer + mirror(-3 , height-1)*stride; 1265 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1247 DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride; 1266 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1248 DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride; 1267 cs->y = -3;
1249 1268 }
1250 for(y=-3; y<=height; y+=2){ 1269
1251 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; 1270 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1252 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; 1271 int y = cs->y;
1272 DWTELEM *b0= cs->b0;
1273 DWTELEM *b1= cs->b1;
1274 DWTELEM *b2= cs->b2;
1275 DWTELEM *b3= cs->b3;
1276 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1277 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1253 1278
1254 if(stride == width && y+4 < height && 0){ 1279 if(stride == width && y+4 < height && 0){
1255 int x; 1280 int x;
1256 for(x=0; x<width/2; x++) 1281 for(x=0; x<width/2; x++)
1257 b5[x] += 64*2; 1282 b5[x] += 64*2;
1270 {START_TIMER 1295 {START_TIMER
1271 if(y-1>= 0) horizontal_compose97i(b0, width); 1296 if(y-1>= 0) horizontal_compose97i(b0, width);
1272 if(b0 <= b2) horizontal_compose97i(b1, width); 1297 if(b0 <= b2) horizontal_compose97i(b1, width);
1273 if(width>400 && b0 <= b2){ 1298 if(width>400 && b0 <= b2){
1274 STOP_TIMER("horizontal_compose97i")}} 1299 STOP_TIMER("horizontal_compose97i")}}
1275 1300
1276 b0=b2; 1301 cs->b0=b2;
1277 b1=b3; 1302 cs->b1=b3;
1278 b2=b4; 1303 cs->b2=b4;
1279 b3=b5; 1304 cs->b3=b5;
1280 } 1305 cs->y += 2;
1281 } 1306 }
1282 1307
1283 void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ 1308 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1309 dwt_compose_t cs;
1310 spatial_compose97i_init(&cs, buffer, height, stride);
1311 while(cs.y <= height)
1312 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1313 }
1314
1315 void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1284 int level; 1316 int level;
1285
1286 for(level=decomposition_count-1; level>=0; level--){ 1317 for(level=decomposition_count-1; level>=0; level--){
1287 switch(type){ 1318 switch(type){
1288 case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break; 1319 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1289 case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break; 1320 case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1290 case 2: spatial_composeX (buffer, width>>level, height>>level, stride<<level); break; 1321 /* not slicified yet */
1291 } 1322 case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1323 }
1324 }
1325 }
1326
1327 void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1328 const int support = type==1 ? 3 : 5;
1329 int level;
1330 if(type==2) return;
1331
1332 for(level=decomposition_count-1; level>=0; level--){
1333 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1334 switch(type){
1335 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1336 break;
1337 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1338 break;
1339 case 2: break;
1340 }
1341 }
1342 }
1343 }
1344
1345 void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1346 if(type==2){
1347 int level;
1348 for(level=decomposition_count-1; level>=0; level--)
1349 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1350 }else{
1351 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1352 int y;
1353 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1354 for(y=0; y<height; y+=4)
1355 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1292 } 1356 }
1293 } 1357 }
1294 1358
1295 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){ 1359 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1296 const int w= b->width; 1360 const int w= b->width;
2197 } 2261 }
2198 } 2262 }
2199 #endif 2263 #endif
2200 } 2264 }
2201 2265
2202 static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){ 2266 static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2203 Plane *p= &s->plane[plane_index]; 2267 Plane *p= &s->plane[plane_index];
2204 const int mb_w= s->b_width << s->block_max_depth; 2268 const int mb_w= s->b_width << s->block_max_depth;
2205 const int mb_h= s->b_height << s->block_max_depth; 2269 const int mb_h= s->b_height << s->block_max_depth;
2206 int x, y, mb_x, mb_y; 2270 int x, y, mb_x;
2207 int block_size = MB_SIZE >> s->block_max_depth; 2271 int block_size = MB_SIZE >> s->block_max_depth;
2208 int block_w = plane_index ? block_size/2 : block_size; 2272 int block_w = plane_index ? block_size/2 : block_size;
2209 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; 2273 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2210 int obmc_stride= plane_index ? block_size : 2*block_size; 2274 int obmc_stride= plane_index ? block_size : 2*block_size;
2211 int ref_stride= s->current_picture.linesize[plane_index]; 2275 int ref_stride= s->current_picture.linesize[plane_index];
2214 int w= p->width; 2278 int w= p->width;
2215 int h= p->height; 2279 int h= p->height;
2216 START_TIMER 2280 START_TIMER
2217 2281
2218 if(s->keyframe || (s->avctx->debug&512)){ 2282 if(s->keyframe || (s->avctx->debug&512)){
2283 if(mb_y==mb_h)
2284 return;
2285
2219 if(add){ 2286 if(add){
2220 for(y=0; y<h; y++){ 2287 for(y=block_w*mb_y; y<block_w*(mb_y+1); y++){
2221 for(x=0; x<w; x++){ 2288 for(x=0; x<w; x++){
2222 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); 2289 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2223 v >>= FRAC_BITS; 2290 v >>= FRAC_BITS;
2224 if(v&(~255)) v= ~(v>>31); 2291 if(v&(~255)) v= ~(v>>31);
2225 dst8[x + y*ref_stride]= v; 2292 dst8[x + y*ref_stride]= v;
2226 } 2293 }
2227 } 2294 }
2228 }else{ 2295 }else{
2229 for(y=0; y<h; y++){ 2296 for(y=block_w*mb_y; y<block_w*(mb_y+1); y++){
2230 for(x=0; x<w; x++){ 2297 for(x=0; x<w; x++){
2231 buf[x + y*w]-= 128<<FRAC_BITS; 2298 buf[x + y*w]-= 128<<FRAC_BITS;
2232 } 2299 }
2233 } 2300 }
2234 } 2301 }
2235 2302
2236 return; 2303 return;
2237 } 2304 }
2238 2305
2239 for(mb_y=0; mb_y<=mb_h; mb_y++){
2240 for(mb_x=0; mb_x<=mb_w; mb_x++){ 2306 for(mb_x=0; mb_x<=mb_w; mb_x++){
2241 START_TIMER 2307 START_TIMER
2242 2308
2243 add_yblock(s, buf, dst8, ref, obmc, 2309 add_yblock(s, buf, dst8, ref, obmc,
2244 block_w*mb_x - block_w/2, 2310 block_w*mb_x - block_w/2,
2249 mb_x - 1, mb_y - 1, 2315 mb_x - 1, mb_y - 1,
2250 add, plane_index); 2316 add, plane_index);
2251 2317
2252 STOP_TIMER("add_yblock") 2318 STOP_TIMER("add_yblock")
2253 } 2319 }
2254 } 2320
2255 2321 STOP_TIMER("predict_slice")
2256 STOP_TIMER("predict_plane") 2322 }
2323
2324 static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2325 const int mb_h= s->b_height << s->block_max_depth;
2326 int mb_y;
2327 for(mb_y=0; mb_y<=mb_h; mb_y++)
2328 predict_slice(s, buf, plane_index, add, mb_y);
2257 } 2329 }
2258 2330
2259 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){ 2331 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2260 const int level= b->level; 2332 const int level= b->level;
2261 const int w= b->width; 2333 const int w= b->width;
2954 assert(b->buf == s->spatial_dwt_buffer); 3026 assert(b->buf == s->spatial_dwt_buffer);
2955 } 3027 }
2956 } 3028 }
2957 } 3029 }
2958 3030
2959 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); 3031 {START_TIMER
3032 const int mb_h= s->b_height << s->block_max_depth;
3033 const int block_size = MB_SIZE >> s->block_max_depth;
3034 const int block_w = plane_index ? block_size/2 : block_size;
3035 int mb_y;
3036 dwt_compose_t cs[MAX_DECOMPOSITIONS];
3037 int yd=0, yq=0;
3038
3039 ff_spatial_idwt_init(cs, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3040 for(mb_y=0; mb_y<=mb_h; mb_y++){
3041 const int slice_h = block_w*(mb_y+1);
3042 for(; yd<slice_h; yd+=4)
3043 ff_spatial_idwt_slice(cs, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
3044
2960 if(s->qlog == LOSSLESS_QLOG){ 3045 if(s->qlog == LOSSLESS_QLOG){
2961 for(y=0; y<h; y++){ 3046 for(; yq<slice_h && yq<h; yq++){
2962 for(x=0; x<w; x++){ 3047 for(x=0; x<w; x++){
2963 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS; 3048 s->spatial_dwt_buffer[yq*w + x]<<=FRAC_BITS;
2964 } 3049 }
2965 } 3050 }
2966 } 3051 }
2967 {START_TIMER 3052
2968 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1); 3053 predict_slice(s, s->spatial_dwt_buffer, plane_index, 1, mb_y);
2969 STOP_TIMER("predict_plane conv2")} 3054 }
3055 STOP_TIMER("idwt + predict_slices")}
2970 } 3056 }
2971 3057
2972 emms_c(); 3058 emms_c();
2973 3059
2974 if(s->last_picture.data[0]) 3060 if(s->last_picture.data[0])