comparison h264.c @ 5545:397cb90b66d0 libavcodec

Statements like a = b = c = d = e; store from right-to-left, so if you write them in the right order it comes out backwards. This removes them from fill_rectangle(). patch by Alexander Strange %astrange A ithinksw P com% Original thread: Date: Aug 14, 2007 5:36 AM Subject: [FFmpeg-devel] [PATCH] two small h264 optimizations
author gpoirier
date Tue, 14 Aug 2007 22:28:09 +0000
parents 4c3b1222ff57
children 81226e690378
comparison
equal deleted inserted replaced
5544:4117a942b325 5545:397cb90b66d0
89 const uint16_t v= size==4 ? val : val*0x0101; 89 const uint16_t v= size==4 ? val : val*0x0101;
90 *(uint16_t*)(p + 0*stride)= v; 90 *(uint16_t*)(p + 0*stride)= v;
91 if(h==1) return; 91 if(h==1) return;
92 *(uint16_t*)(p + 1*stride)= v; 92 *(uint16_t*)(p + 1*stride)= v;
93 if(h==2) return; 93 if(h==2) return;
94 *(uint16_t*)(p + 2*stride)= 94 *(uint16_t*)(p + 2*stride)= v;
95 *(uint16_t*)(p + 3*stride)= v; 95 *(uint16_t*)(p + 3*stride)= v;
96 }else if(w==4){ 96 }else if(w==4){
97 const uint32_t v= size==4 ? val : val*0x01010101; 97 const uint32_t v= size==4 ? val : val*0x01010101;
98 *(uint32_t*)(p + 0*stride)= v; 98 *(uint32_t*)(p + 0*stride)= v;
99 if(h==1) return; 99 if(h==1) return;
100 *(uint32_t*)(p + 1*stride)= v; 100 *(uint32_t*)(p + 1*stride)= v;
101 if(h==2) return; 101 if(h==2) return;
102 *(uint32_t*)(p + 2*stride)= 102 *(uint32_t*)(p + 2*stride)= v;
103 *(uint32_t*)(p + 3*stride)= v; 103 *(uint32_t*)(p + 3*stride)= v;
104 }else if(w==8){ 104 }else if(w==8){
105 //gcc can't optimize 64bit math on x86_32 105 //gcc can't optimize 64bit math on x86_32
106 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64) 106 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
107 const uint64_t v= val*0x0100000001ULL; 107 const uint64_t v= val*0x0100000001ULL;
108 *(uint64_t*)(p + 0*stride)= v; 108 *(uint64_t*)(p + 0*stride)= v;
109 if(h==1) return; 109 if(h==1) return;
110 *(uint64_t*)(p + 1*stride)= v; 110 *(uint64_t*)(p + 1*stride)= v;
111 if(h==2) return; 111 if(h==2) return;
112 *(uint64_t*)(p + 2*stride)= 112 *(uint64_t*)(p + 2*stride)= v;
113 *(uint64_t*)(p + 3*stride)= v; 113 *(uint64_t*)(p + 3*stride)= v;
114 }else if(w==16){ 114 }else if(w==16){
115 const uint64_t v= val*0x0100000001ULL; 115 const uint64_t v= val*0x0100000001ULL;
116 *(uint64_t*)(p + 0+0*stride)= 116 *(uint64_t*)(p + 0+0*stride)= v;
117 *(uint64_t*)(p + 8+0*stride)= 117 *(uint64_t*)(p + 8+0*stride)= v;
118 *(uint64_t*)(p + 0+1*stride)= 118 *(uint64_t*)(p + 0+1*stride)= v;
119 *(uint64_t*)(p + 8+1*stride)= v; 119 *(uint64_t*)(p + 8+1*stride)= v;
120 if(h==2) return; 120 if(h==2) return;
121 *(uint64_t*)(p + 0+2*stride)= 121 *(uint64_t*)(p + 0+2*stride)= v;
122 *(uint64_t*)(p + 8+2*stride)= 122 *(uint64_t*)(p + 8+2*stride)= v;
123 *(uint64_t*)(p + 0+3*stride)= 123 *(uint64_t*)(p + 0+3*stride)= v;
124 *(uint64_t*)(p + 8+3*stride)= v; 124 *(uint64_t*)(p + 8+3*stride)= v;
125 #else 125 #else
126 *(uint32_t*)(p + 0+0*stride)= 126 *(uint32_t*)(p + 0+0*stride)= val;
127 *(uint32_t*)(p + 4+0*stride)= val; 127 *(uint32_t*)(p + 4+0*stride)= val;
128 if(h==1) return; 128 if(h==1) return;
129 *(uint32_t*)(p + 0+1*stride)= 129 *(uint32_t*)(p + 0+1*stride)= val;
130 *(uint32_t*)(p + 4+1*stride)= val; 130 *(uint32_t*)(p + 4+1*stride)= val;
131 if(h==2) return; 131 if(h==2) return;
132 *(uint32_t*)(p + 0+2*stride)= 132 *(uint32_t*)(p + 0+2*stride)= val;
133 *(uint32_t*)(p + 4+2*stride)= 133 *(uint32_t*)(p + 4+2*stride)= val;
134 *(uint32_t*)(p + 0+3*stride)= 134 *(uint32_t*)(p + 0+3*stride)= val;
135 *(uint32_t*)(p + 4+3*stride)= val; 135 *(uint32_t*)(p + 4+3*stride)= val;
136 }else if(w==16){ 136 }else if(w==16){
137 *(uint32_t*)(p + 0+0*stride)= 137 *(uint32_t*)(p + 0+0*stride)= val;
138 *(uint32_t*)(p + 4+0*stride)= 138 *(uint32_t*)(p + 4+0*stride)= val;
139 *(uint32_t*)(p + 8+0*stride)= 139 *(uint32_t*)(p + 8+0*stride)= val;
140 *(uint32_t*)(p +12+0*stride)= 140 *(uint32_t*)(p +12+0*stride)= val;
141 *(uint32_t*)(p + 0+1*stride)= 141 *(uint32_t*)(p + 0+1*stride)= val;
142 *(uint32_t*)(p + 4+1*stride)= 142 *(uint32_t*)(p + 4+1*stride)= val;
143 *(uint32_t*)(p + 8+1*stride)= 143 *(uint32_t*)(p + 8+1*stride)= val;
144 *(uint32_t*)(p +12+1*stride)= val; 144 *(uint32_t*)(p +12+1*stride)= val;
145 if(h==2) return; 145 if(h==2) return;
146 *(uint32_t*)(p + 0+2*stride)= 146 *(uint32_t*)(p + 0+2*stride)= val;
147 *(uint32_t*)(p + 4+2*stride)= 147 *(uint32_t*)(p + 4+2*stride)= val;
148 *(uint32_t*)(p + 8+2*stride)= 148 *(uint32_t*)(p + 8+2*stride)= val;
149 *(uint32_t*)(p +12+2*stride)= 149 *(uint32_t*)(p +12+2*stride)= val;
150 *(uint32_t*)(p + 0+3*stride)= 150 *(uint32_t*)(p + 0+3*stride)= val;
151 *(uint32_t*)(p + 4+3*stride)= 151 *(uint32_t*)(p + 4+3*stride)= val;
152 *(uint32_t*)(p + 8+3*stride)= 152 *(uint32_t*)(p + 8+3*stride)= val;
153 *(uint32_t*)(p +12+3*stride)= val; 153 *(uint32_t*)(p +12+3*stride)= val;
154 #endif 154 #endif
155 }else 155 }else
156 assert(0); 156 assert(0);
157 assert(h==4); 157 assert(h==4);