libavcodec.hg: h264.c comparison

comparison h264.c @ 3315:cfd452a6560b libavcodec

h264: faster fill_rectangle()

author	lorenm
date	Sun, 28 May 2006 22:28:08 +0000
parents	9637da0a9c1b
children	7278f730af27

comparison

equal deleted inserted replaced

-:aea2230e6033
+:cfd452a6560b
 * @param size the size of val (1 or 4), should be a constant
 */
 static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
 uint8_t *p= (uint8_t*)vp;
 assert(size==1 || size==4);
+assert(w<=4);
 w      *= size;
 stride *= size;
 assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
 assert((stride&(w-1))==0);
-//FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it
+if(w==2){
-if(w==2 && h==2){
+const uint16_t v= size==4 ? val : val*0x0101;
-*(uint16_t*)(p + 0)=
+*(uint16_t*)(p + 0*stride)= v;
-*(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
+if(h==1) return;
-}else if(w==2 && h==4){
+*(uint16_t*)(p + 1*stride)= v;
-*(uint16_t*)(p + 0*stride)=
+if(h==2) return;
-*(uint16_t*)(p + 1*stride)=
 *(uint16_t*)(p + 2*stride)=
-*(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
+*(uint16_t*)(p + 3*stride)= v;
-}else if(w==4 && h==1){
+}else if(w==4){
-*(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
+const uint32_t v= size==4 ? val : val*0x01010101;
-}else if(w==4 && h==2){
+*(uint32_t*)(p + 0*stride)= v;
-*(uint32_t*)(p + 0*stride)=
+if(h==1) return;
-*(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
+*(uint32_t*)(p + 1*stride)= v;
-}else if(w==4 && h==4){
+if(h==2) return;
-*(uint32_t*)(p + 0*stride)=
-*(uint32_t*)(p + 1*stride)=
 *(uint32_t*)(p + 2*stride)=
-*(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
+*(uint32_t*)(p + 3*stride)= v;
-}else if(w==8 && h==1){
+}else if(w==8){
-*(uint32_t*)(p + 0)=
+//gcc can't optimize 64bit math on x86_32
-*(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
+#if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
-}else if(w==8 && h==2){
+const uint64_t v= val*0x0100000001ULL;
-*(uint32_t*)(p + 0 + 0*stride)=
+*(uint64_t*)(p + 0*stride)= v;
-*(uint32_t*)(p + 4 + 0*stride)=
+if(h==1) return;
-*(uint32_t*)(p + 0 + 1*stride)=
+*(uint64_t*)(p + 1*stride)= v;
-*(uint32_t*)(p + 4 + 1*stride)=  size==4 ? val : val*0x01010101;
+if(h==2) return;
-}else if(w==8 && h==4){
-*(uint64_t*)(p + 0*stride)=
-*(uint64_t*)(p + 1*stride)=
 *(uint64_t*)(p + 2*stride)=
-*(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
+*(uint64_t*)(p + 3*stride)= v;
-}else if(w==16 && h==2){
+}else if(w==16){
+const uint64_t v= val*0x0100000001ULL;
 *(uint64_t*)(p + 0+0*stride)=
 *(uint64_t*)(p + 8+0*stride)=
 *(uint64_t*)(p + 0+1*stride)=
-*(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
+*(uint64_t*)(p + 8+1*stride)= v;
-}else if(w==16 && h==4){
+if(h==2) return;
-*(uint64_t*)(p + 0+0*stride)=
-*(uint64_t*)(p + 8+0*stride)=
-*(uint64_t*)(p + 0+1*stride)=
-*(uint64_t*)(p + 8+1*stride)=
 *(uint64_t*)(p + 0+2*stride)=
 *(uint64_t*)(p + 8+2*stride)=
 *(uint64_t*)(p + 0+3*stride)=
-*(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
+*(uint64_t*)(p + 8+3*stride)= v;
+#else
+*(uint32_t*)(p + 0+0*stride)=
+*(uint32_t*)(p + 4+0*stride)= val;
+if(h==1) return;
+*(uint32_t*)(p + 0+1*stride)=
+*(uint32_t*)(p + 4+1*stride)= val;
+if(h==2) return;
+*(uint32_t*)(p + 0+2*stride)=
+*(uint32_t*)(p + 4+2*stride)=
+*(uint32_t*)(p + 0+3*stride)=
+*(uint32_t*)(p + 4+3*stride)= val;
+}else if(w==16){
+*(uint32_t*)(p + 0+0*stride)=
+*(uint32_t*)(p + 4+0*stride)=
+*(uint32_t*)(p + 8+0*stride)=
+*(uint32_t*)(p +12+0*stride)=
+*(uint32_t*)(p + 0+1*stride)=
+*(uint32_t*)(p + 4+1*stride)=
+*(uint32_t*)(p + 8+1*stride)=
+*(uint32_t*)(p +12+1*stride)= val;
+if(h==2) return;
+*(uint32_t*)(p + 0+2*stride)=
+*(uint32_t*)(p + 4+2*stride)=
+*(uint32_t*)(p + 8+2*stride)=
+*(uint32_t*)(p +12+2*stride)=
+*(uint32_t*)(p + 0+3*stride)=
+*(uint32_t*)(p + 4+3*stride)=
+*(uint32_t*)(p + 8+3*stride)=
+*(uint32_t*)(p +12+3*stride)= val;
+#endif
 }else
 assert(0);
+assert(h==4);
 }
 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
 MpegEncContext * const s = &h->s;
 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;

Mercurial > libavcodec.hg

comparison h264.c @ 3315:cfd452a6560b libavcodec