Mercurial > libavcodec.hg
comparison h264.c @ 5545:397cb90b66d0 libavcodec
Statements like a = b = c = d = e; store from right-to-left, so if
you write them in the right order it comes out backwards.
This removes them from fill_rectangle().
patch by Alexander Strange %astrange A ithinksw P com%
Original thread:
Date: Aug 14, 2007 5:36 AM
Subject: [FFmpeg-devel] [PATCH] two small h264 optimizations
author | gpoirier |
---|---|
date | Tue, 14 Aug 2007 22:28:09 +0000 |
parents | 4c3b1222ff57 |
children | 81226e690378 |
comparison
equal
deleted
inserted
replaced
5544:4117a942b325 | 5545:397cb90b66d0 |
---|---|
89 const uint16_t v= size==4 ? val : val*0x0101; | 89 const uint16_t v= size==4 ? val : val*0x0101; |
90 *(uint16_t*)(p + 0*stride)= v; | 90 *(uint16_t*)(p + 0*stride)= v; |
91 if(h==1) return; | 91 if(h==1) return; |
92 *(uint16_t*)(p + 1*stride)= v; | 92 *(uint16_t*)(p + 1*stride)= v; |
93 if(h==2) return; | 93 if(h==2) return; |
94 *(uint16_t*)(p + 2*stride)= | 94 *(uint16_t*)(p + 2*stride)= v; |
95 *(uint16_t*)(p + 3*stride)= v; | 95 *(uint16_t*)(p + 3*stride)= v; |
96 }else if(w==4){ | 96 }else if(w==4){ |
97 const uint32_t v= size==4 ? val : val*0x01010101; | 97 const uint32_t v= size==4 ? val : val*0x01010101; |
98 *(uint32_t*)(p + 0*stride)= v; | 98 *(uint32_t*)(p + 0*stride)= v; |
99 if(h==1) return; | 99 if(h==1) return; |
100 *(uint32_t*)(p + 1*stride)= v; | 100 *(uint32_t*)(p + 1*stride)= v; |
101 if(h==2) return; | 101 if(h==2) return; |
102 *(uint32_t*)(p + 2*stride)= | 102 *(uint32_t*)(p + 2*stride)= v; |
103 *(uint32_t*)(p + 3*stride)= v; | 103 *(uint32_t*)(p + 3*stride)= v; |
104 }else if(w==8){ | 104 }else if(w==8){ |
105 //gcc can't optimize 64bit math on x86_32 | 105 //gcc can't optimize 64bit math on x86_32 |
106 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64) | 106 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64) |
107 const uint64_t v= val*0x0100000001ULL; | 107 const uint64_t v= val*0x0100000001ULL; |
108 *(uint64_t*)(p + 0*stride)= v; | 108 *(uint64_t*)(p + 0*stride)= v; |
109 if(h==1) return; | 109 if(h==1) return; |
110 *(uint64_t*)(p + 1*stride)= v; | 110 *(uint64_t*)(p + 1*stride)= v; |
111 if(h==2) return; | 111 if(h==2) return; |
112 *(uint64_t*)(p + 2*stride)= | 112 *(uint64_t*)(p + 2*stride)= v; |
113 *(uint64_t*)(p + 3*stride)= v; | 113 *(uint64_t*)(p + 3*stride)= v; |
114 }else if(w==16){ | 114 }else if(w==16){ |
115 const uint64_t v= val*0x0100000001ULL; | 115 const uint64_t v= val*0x0100000001ULL; |
116 *(uint64_t*)(p + 0+0*stride)= | 116 *(uint64_t*)(p + 0+0*stride)= v; |
117 *(uint64_t*)(p + 8+0*stride)= | 117 *(uint64_t*)(p + 8+0*stride)= v; |
118 *(uint64_t*)(p + 0+1*stride)= | 118 *(uint64_t*)(p + 0+1*stride)= v; |
119 *(uint64_t*)(p + 8+1*stride)= v; | 119 *(uint64_t*)(p + 8+1*stride)= v; |
120 if(h==2) return; | 120 if(h==2) return; |
121 *(uint64_t*)(p + 0+2*stride)= | 121 *(uint64_t*)(p + 0+2*stride)= v; |
122 *(uint64_t*)(p + 8+2*stride)= | 122 *(uint64_t*)(p + 8+2*stride)= v; |
123 *(uint64_t*)(p + 0+3*stride)= | 123 *(uint64_t*)(p + 0+3*stride)= v; |
124 *(uint64_t*)(p + 8+3*stride)= v; | 124 *(uint64_t*)(p + 8+3*stride)= v; |
125 #else | 125 #else |
126 *(uint32_t*)(p + 0+0*stride)= | 126 *(uint32_t*)(p + 0+0*stride)= val; |
127 *(uint32_t*)(p + 4+0*stride)= val; | 127 *(uint32_t*)(p + 4+0*stride)= val; |
128 if(h==1) return; | 128 if(h==1) return; |
129 *(uint32_t*)(p + 0+1*stride)= | 129 *(uint32_t*)(p + 0+1*stride)= val; |
130 *(uint32_t*)(p + 4+1*stride)= val; | 130 *(uint32_t*)(p + 4+1*stride)= val; |
131 if(h==2) return; | 131 if(h==2) return; |
132 *(uint32_t*)(p + 0+2*stride)= | 132 *(uint32_t*)(p + 0+2*stride)= val; |
133 *(uint32_t*)(p + 4+2*stride)= | 133 *(uint32_t*)(p + 4+2*stride)= val; |
134 *(uint32_t*)(p + 0+3*stride)= | 134 *(uint32_t*)(p + 0+3*stride)= val; |
135 *(uint32_t*)(p + 4+3*stride)= val; | 135 *(uint32_t*)(p + 4+3*stride)= val; |
136 }else if(w==16){ | 136 }else if(w==16){ |
137 *(uint32_t*)(p + 0+0*stride)= | 137 *(uint32_t*)(p + 0+0*stride)= val; |
138 *(uint32_t*)(p + 4+0*stride)= | 138 *(uint32_t*)(p + 4+0*stride)= val; |
139 *(uint32_t*)(p + 8+0*stride)= | 139 *(uint32_t*)(p + 8+0*stride)= val; |
140 *(uint32_t*)(p +12+0*stride)= | 140 *(uint32_t*)(p +12+0*stride)= val; |
141 *(uint32_t*)(p + 0+1*stride)= | 141 *(uint32_t*)(p + 0+1*stride)= val; |
142 *(uint32_t*)(p + 4+1*stride)= | 142 *(uint32_t*)(p + 4+1*stride)= val; |
143 *(uint32_t*)(p + 8+1*stride)= | 143 *(uint32_t*)(p + 8+1*stride)= val; |
144 *(uint32_t*)(p +12+1*stride)= val; | 144 *(uint32_t*)(p +12+1*stride)= val; |
145 if(h==2) return; | 145 if(h==2) return; |
146 *(uint32_t*)(p + 0+2*stride)= | 146 *(uint32_t*)(p + 0+2*stride)= val; |
147 *(uint32_t*)(p + 4+2*stride)= | 147 *(uint32_t*)(p + 4+2*stride)= val; |
148 *(uint32_t*)(p + 8+2*stride)= | 148 *(uint32_t*)(p + 8+2*stride)= val; |
149 *(uint32_t*)(p +12+2*stride)= | 149 *(uint32_t*)(p +12+2*stride)= val; |
150 *(uint32_t*)(p + 0+3*stride)= | 150 *(uint32_t*)(p + 0+3*stride)= val; |
151 *(uint32_t*)(p + 4+3*stride)= | 151 *(uint32_t*)(p + 4+3*stride)= val; |
152 *(uint32_t*)(p + 8+3*stride)= | 152 *(uint32_t*)(p + 8+3*stride)= val; |
153 *(uint32_t*)(p +12+3*stride)= val; | 153 *(uint32_t*)(p +12+3*stride)= val; |
154 #endif | 154 #endif |
155 }else | 155 }else |
156 assert(0); | 156 assert(0); |
157 assert(h==4); | 157 assert(h==4); |