Mercurial > libavcodec.hg
comparison snow.c @ 2562:d0a58dca5ad2 libavcodec
slice-based idwt (10% faster decoding)
author | lorenm |
---|---|
date | Tue, 15 Mar 2005 21:05:34 +0000 |
parents | d3885f927bc7 |
children | a5a62827f195 |
comparison
equal
deleted
inserted
replaced
2561:eb72c01df6ed | 2562:d0a58dca5ad2 |
---|---|
428 BlockNode *block; | 428 BlockNode *block; |
429 | 429 |
430 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) | 430 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) |
431 }SnowContext; | 431 }SnowContext; |
432 | 432 |
433 typedef struct { | |
434 DWTELEM *b0; | |
435 DWTELEM *b1; | |
436 DWTELEM *b2; | |
437 DWTELEM *b3; | |
438 int y; | |
439 } dwt_compose_t; | |
440 | |
433 #ifdef __sgi | 441 #ifdef __sgi |
434 // Avoid a name clash on SGI IRIX | 442 // Avoid a name clash on SGI IRIX |
435 #undef qexp | 443 #undef qexp |
436 #endif | 444 #endif |
437 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0 | 445 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0 |
1164 for(i=0; i<width; i++){ | 1172 for(i=0; i<width; i++){ |
1165 b1[i] -= (b0[i] + b2[i] + 2)>>2; | 1173 b1[i] -= (b0[i] + b2[i] + 2)>>2; |
1166 } | 1174 } |
1167 } | 1175 } |
1168 | 1176 |
1169 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){ | 1177 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){ |
1170 int y; | 1178 cs->b0 = buffer + mirror(-1-1, height-1)*stride; |
1171 DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride; | 1179 cs->b1 = buffer + mirror(-1 , height-1)*stride; |
1172 DWTELEM *b1= buffer + mirror(-1 , height-1)*stride; | 1180 cs->y = -1; |
1173 | 1181 } |
1174 for(y=-1; y<=height; y+=2){ | 1182 |
1175 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; | 1183 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){ |
1176 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; | 1184 int y= cs->y; |
1185 DWTELEM *b0= cs->b0; | |
1186 DWTELEM *b1= cs->b1; | |
1187 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; | |
1188 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; | |
1177 | 1189 |
1178 {START_TIMER | 1190 {START_TIMER |
1179 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width); | 1191 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width); |
1180 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width); | 1192 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width); |
1181 STOP_TIMER("vertical_compose53i*")} | 1193 STOP_TIMER("vertical_compose53i*")} |
1183 {START_TIMER | 1195 {START_TIMER |
1184 if(y-1 >= 0) horizontal_compose53i(b0, width); | 1196 if(y-1 >= 0) horizontal_compose53i(b0, width); |
1185 if(b0 <= b2) horizontal_compose53i(b1, width); | 1197 if(b0 <= b2) horizontal_compose53i(b1, width); |
1186 STOP_TIMER("horizontal_compose53i")} | 1198 STOP_TIMER("horizontal_compose53i")} |
1187 | 1199 |
1188 b0=b2; | 1200 cs->b0 = b2; |
1189 b1=b3; | 1201 cs->b1 = b3; |
1190 } | 1202 cs->y += 2; |
1203 } | |
1204 | |
1205 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){ | |
1206 dwt_compose_t cs; | |
1207 spatial_compose53i_init(&cs, buffer, height, stride); | |
1208 while(cs.y <= height) | |
1209 spatial_compose53i_dy(&cs, buffer, width, height, stride); | |
1191 } | 1210 } |
1192 | 1211 |
1193 | 1212 |
1194 static void horizontal_compose97i(DWTELEM *b, int width){ | 1213 static void horizontal_compose97i(DWTELEM *b, int width){ |
1195 DWTELEM temp[width]; | 1214 DWTELEM temp[width]; |
1238 for(i=0; i<width; i++){ | 1257 for(i=0; i<width; i++){ |
1239 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; | 1258 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; |
1240 } | 1259 } |
1241 } | 1260 } |
1242 | 1261 |
1243 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){ | 1262 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){ |
1244 int y; | 1263 cs->b0 = buffer + mirror(-3-1, height-1)*stride; |
1245 DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride; | 1264 cs->b1 = buffer + mirror(-3 , height-1)*stride; |
1246 DWTELEM *b1= buffer + mirror(-3 , height-1)*stride; | 1265 cs->b2 = buffer + mirror(-3+1, height-1)*stride; |
1247 DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride; | 1266 cs->b3 = buffer + mirror(-3+2, height-1)*stride; |
1248 DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride; | 1267 cs->y = -3; |
1249 | 1268 } |
1250 for(y=-3; y<=height; y+=2){ | 1269 |
1251 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; | 1270 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){ |
1252 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; | 1271 int y = cs->y; |
1272 DWTELEM *b0= cs->b0; | |
1273 DWTELEM *b1= cs->b1; | |
1274 DWTELEM *b2= cs->b2; | |
1275 DWTELEM *b3= cs->b3; | |
1276 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; | |
1277 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; | |
1253 | 1278 |
1254 if(stride == width && y+4 < height && 0){ | 1279 if(stride == width && y+4 < height && 0){ |
1255 int x; | 1280 int x; |
1256 for(x=0; x<width/2; x++) | 1281 for(x=0; x<width/2; x++) |
1257 b5[x] += 64*2; | 1282 b5[x] += 64*2; |
1270 {START_TIMER | 1295 {START_TIMER |
1271 if(y-1>= 0) horizontal_compose97i(b0, width); | 1296 if(y-1>= 0) horizontal_compose97i(b0, width); |
1272 if(b0 <= b2) horizontal_compose97i(b1, width); | 1297 if(b0 <= b2) horizontal_compose97i(b1, width); |
1273 if(width>400 && b0 <= b2){ | 1298 if(width>400 && b0 <= b2){ |
1274 STOP_TIMER("horizontal_compose97i")}} | 1299 STOP_TIMER("horizontal_compose97i")}} |
1275 | 1300 |
1276 b0=b2; | 1301 cs->b0=b2; |
1277 b1=b3; | 1302 cs->b1=b3; |
1278 b2=b4; | 1303 cs->b2=b4; |
1279 b3=b5; | 1304 cs->b3=b5; |
1280 } | 1305 cs->y += 2; |
1281 } | 1306 } |
1282 | 1307 |
1283 void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ | 1308 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){ |
1309 dwt_compose_t cs; | |
1310 spatial_compose97i_init(&cs, buffer, height, stride); | |
1311 while(cs.y <= height) | |
1312 spatial_compose97i_dy(&cs, buffer, width, height, stride); | |
1313 } | |
1314 | |
1315 void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ | |
1284 int level; | 1316 int level; |
1285 | |
1286 for(level=decomposition_count-1; level>=0; level--){ | 1317 for(level=decomposition_count-1; level>=0; level--){ |
1287 switch(type){ | 1318 switch(type){ |
1288 case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break; | 1319 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; |
1289 case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break; | 1320 case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; |
1290 case 2: spatial_composeX (buffer, width>>level, height>>level, stride<<level); break; | 1321 /* not slicified yet */ |
1291 } | 1322 case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break; |
1323 } | |
1324 } | |
1325 } | |
1326 | |
1327 void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){ | |
1328 const int support = type==1 ? 3 : 5; | |
1329 int level; | |
1330 if(type==2) return; | |
1331 | |
1332 for(level=decomposition_count-1; level>=0; level--){ | |
1333 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ | |
1334 switch(type){ | |
1335 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); | |
1336 break; | |
1337 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); | |
1338 break; | |
1339 case 2: break; | |
1340 } | |
1341 } | |
1342 } | |
1343 } | |
1344 | |
1345 void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ | |
1346 if(type==2){ | |
1347 int level; | |
1348 for(level=decomposition_count-1; level>=0; level--) | |
1349 spatial_composeX (buffer, width>>level, height>>level, stride<<level); | |
1350 }else{ | |
1351 dwt_compose_t cs[MAX_DECOMPOSITIONS]; | |
1352 int y; | |
1353 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count); | |
1354 for(y=0; y<height; y+=4) | |
1355 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y); | |
1292 } | 1356 } |
1293 } | 1357 } |
1294 | 1358 |
1295 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){ | 1359 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){ |
1296 const int w= b->width; | 1360 const int w= b->width; |
2197 } | 2261 } |
2198 } | 2262 } |
2199 #endif | 2263 #endif |
2200 } | 2264 } |
2201 | 2265 |
2202 static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){ | 2266 static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){ |
2203 Plane *p= &s->plane[plane_index]; | 2267 Plane *p= &s->plane[plane_index]; |
2204 const int mb_w= s->b_width << s->block_max_depth; | 2268 const int mb_w= s->b_width << s->block_max_depth; |
2205 const int mb_h= s->b_height << s->block_max_depth; | 2269 const int mb_h= s->b_height << s->block_max_depth; |
2206 int x, y, mb_x, mb_y; | 2270 int x, y, mb_x; |
2207 int block_size = MB_SIZE >> s->block_max_depth; | 2271 int block_size = MB_SIZE >> s->block_max_depth; |
2208 int block_w = plane_index ? block_size/2 : block_size; | 2272 int block_w = plane_index ? block_size/2 : block_size; |
2209 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; | 2273 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; |
2210 int obmc_stride= plane_index ? block_size : 2*block_size; | 2274 int obmc_stride= plane_index ? block_size : 2*block_size; |
2211 int ref_stride= s->current_picture.linesize[plane_index]; | 2275 int ref_stride= s->current_picture.linesize[plane_index]; |
2214 int w= p->width; | 2278 int w= p->width; |
2215 int h= p->height; | 2279 int h= p->height; |
2216 START_TIMER | 2280 START_TIMER |
2217 | 2281 |
2218 if(s->keyframe || (s->avctx->debug&512)){ | 2282 if(s->keyframe || (s->avctx->debug&512)){ |
2283 if(mb_y==mb_h) | |
2284 return; | |
2285 | |
2219 if(add){ | 2286 if(add){ |
2220 for(y=0; y<h; y++){ | 2287 for(y=block_w*mb_y; y<block_w*(mb_y+1); y++){ |
2221 for(x=0; x<w; x++){ | 2288 for(x=0; x<w; x++){ |
2222 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); | 2289 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); |
2223 v >>= FRAC_BITS; | 2290 v >>= FRAC_BITS; |
2224 if(v&(~255)) v= ~(v>>31); | 2291 if(v&(~255)) v= ~(v>>31); |
2225 dst8[x + y*ref_stride]= v; | 2292 dst8[x + y*ref_stride]= v; |
2226 } | 2293 } |
2227 } | 2294 } |
2228 }else{ | 2295 }else{ |
2229 for(y=0; y<h; y++){ | 2296 for(y=block_w*mb_y; y<block_w*(mb_y+1); y++){ |
2230 for(x=0; x<w; x++){ | 2297 for(x=0; x<w; x++){ |
2231 buf[x + y*w]-= 128<<FRAC_BITS; | 2298 buf[x + y*w]-= 128<<FRAC_BITS; |
2232 } | 2299 } |
2233 } | 2300 } |
2234 } | 2301 } |
2235 | 2302 |
2236 return; | 2303 return; |
2237 } | 2304 } |
2238 | 2305 |
2239 for(mb_y=0; mb_y<=mb_h; mb_y++){ | |
2240 for(mb_x=0; mb_x<=mb_w; mb_x++){ | 2306 for(mb_x=0; mb_x<=mb_w; mb_x++){ |
2241 START_TIMER | 2307 START_TIMER |
2242 | 2308 |
2243 add_yblock(s, buf, dst8, ref, obmc, | 2309 add_yblock(s, buf, dst8, ref, obmc, |
2244 block_w*mb_x - block_w/2, | 2310 block_w*mb_x - block_w/2, |
2249 mb_x - 1, mb_y - 1, | 2315 mb_x - 1, mb_y - 1, |
2250 add, plane_index); | 2316 add, plane_index); |
2251 | 2317 |
2252 STOP_TIMER("add_yblock") | 2318 STOP_TIMER("add_yblock") |
2253 } | 2319 } |
2254 } | 2320 |
2255 | 2321 STOP_TIMER("predict_slice") |
2256 STOP_TIMER("predict_plane") | 2322 } |
2323 | |
2324 static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){ | |
2325 const int mb_h= s->b_height << s->block_max_depth; | |
2326 int mb_y; | |
2327 for(mb_y=0; mb_y<=mb_h; mb_y++) | |
2328 predict_slice(s, buf, plane_index, add, mb_y); | |
2257 } | 2329 } |
2258 | 2330 |
2259 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){ | 2331 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){ |
2260 const int level= b->level; | 2332 const int level= b->level; |
2261 const int w= b->width; | 2333 const int w= b->width; |
2954 assert(b->buf == s->spatial_dwt_buffer); | 3026 assert(b->buf == s->spatial_dwt_buffer); |
2955 } | 3027 } |
2956 } | 3028 } |
2957 } | 3029 } |
2958 | 3030 |
2959 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); | 3031 {START_TIMER |
3032 const int mb_h= s->b_height << s->block_max_depth; | |
3033 const int block_size = MB_SIZE >> s->block_max_depth; | |
3034 const int block_w = plane_index ? block_size/2 : block_size; | |
3035 int mb_y; | |
3036 dwt_compose_t cs[MAX_DECOMPOSITIONS]; | |
3037 int yd=0, yq=0; | |
3038 | |
3039 ff_spatial_idwt_init(cs, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); | |
3040 for(mb_y=0; mb_y<=mb_h; mb_y++){ | |
3041 const int slice_h = block_w*(mb_y+1); | |
3042 for(; yd<slice_h; yd+=4) | |
3043 ff_spatial_idwt_slice(cs, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); | |
3044 | |
2960 if(s->qlog == LOSSLESS_QLOG){ | 3045 if(s->qlog == LOSSLESS_QLOG){ |
2961 for(y=0; y<h; y++){ | 3046 for(; yq<slice_h && yq<h; yq++){ |
2962 for(x=0; x<w; x++){ | 3047 for(x=0; x<w; x++){ |
2963 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS; | 3048 s->spatial_dwt_buffer[yq*w + x]<<=FRAC_BITS; |
2964 } | 3049 } |
2965 } | 3050 } |
2966 } | 3051 } |
2967 {START_TIMER | 3052 |
2968 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1); | 3053 predict_slice(s, s->spatial_dwt_buffer, plane_index, 1, mb_y); |
2969 STOP_TIMER("predict_plane conv2")} | 3054 } |
3055 STOP_TIMER("idwt + predict_slices")} | |
2970 } | 3056 } |
2971 | 3057 |
2972 emms_c(); | 3058 emms_c(); |
2973 | 3059 |
2974 if(s->last_picture.data[0]) | 3060 if(s->last_picture.data[0]) |