comparison x86/snowdsp_mmx.c @ 11485:0f0cd6b5791f libavcodec

Separate DWT from snow and dsputil This moves the DWT functions from snow.c and dsputil.c to a file of their own. A new struct, DWTContext, holds the function pointers previously part of DSPContext.
author mru
date Sun, 14 Mar 2010 17:50:12 +0000
parents b57409c0c286
children 3fc4c625b6f3
comparison
equal deleted inserted replaced
11484:5330f17dc769 11485:0f0cd6b5791f
20 */ 20 */
21 21
22 #include "libavutil/x86_cpu.h" 22 #include "libavutil/x86_cpu.h"
23 #include "libavcodec/avcodec.h" 23 #include "libavcodec/avcodec.h"
24 #include "libavcodec/snow.h" 24 #include "libavcodec/snow.h"
25 #include "libavcodec/dwt.h"
25 #include "dsputil_mmx.h" 26 #include "dsputil_mmx.h"
26 27
27 void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ 28 static void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
28 const int w2= (width+1)>>1; 29 const int w2= (width+1)>>1;
29 DECLARE_ALIGNED(16, IDWTELEM, temp)[width>>1]; 30 DECLARE_ALIGNED(16, IDWTELEM, temp)[width>>1];
30 const int w_l= (width>>1); 31 const int w_l= (width>>1);
31 const int w_r= w2 - 1; 32 const int w_r= w2 - 1;
32 int i; 33 int i;
211 ); 212 );
212 } 213 }
213 } 214 }
214 } 215 }
215 216
216 void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ 217 static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
217 const int w2= (width+1)>>1; 218 const int w2= (width+1)>>1;
218 IDWTELEM temp[width >> 1]; 219 IDWTELEM temp[width >> 1];
219 const int w_l= (width>>1); 220 const int w_l= (width>>1);
220 const int w_r= w2 - 1; 221 const int w_r= w2 - 1;
221 int i; 222 int i;
434 "movdqa %%"s0", %%"t0" \n\t"\ 435 "movdqa %%"s0", %%"t0" \n\t"\
435 "movdqa %%"s1", %%"t1" \n\t"\ 436 "movdqa %%"s1", %%"t1" \n\t"\
436 "movdqa %%"s2", %%"t2" \n\t"\ 437 "movdqa %%"s2", %%"t2" \n\t"\
437 "movdqa %%"s3", %%"t3" \n\t" 438 "movdqa %%"s3", %%"t3" \n\t"
438 439
439 void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ 440 static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
440 x86_reg i = width; 441 x86_reg i = width;
441 442
442 while(i & 0x1F) 443 while(i & 0x1F)
443 { 444 {
444 i--; 445 i--;
532 "movq %%"s1", %%"t1" \n\t"\ 533 "movq %%"s1", %%"t1" \n\t"\
533 "movq %%"s2", %%"t2" \n\t"\ 534 "movq %%"s2", %%"t2" \n\t"\
534 "movq %%"s3", %%"t3" \n\t" 535 "movq %%"s3", %%"t3" \n\t"
535 536
536 537
537 void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ 538 static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
538 x86_reg i = width; 539 x86_reg i = width;
539 while(i & 15) 540 while(i & 15)
540 { 541 {
541 i--; 542 i--;
542 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS; 543 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
845 snow_inner_add_yblock_mmx_accum("0", "536", "8") 846 snow_inner_add_yblock_mmx_accum("0", "536", "8")
846 snow_inner_add_yblock_mmx_mix("16", "8") 847 snow_inner_add_yblock_mmx_mix("16", "8")
847 snow_inner_add_yblock_mmx_end("32") 848 snow_inner_add_yblock_mmx_end("32")
848 } 849 }
849 850
850 void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, 851 static void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
851 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ 852 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
852 853
853 if (b_w == 16) 854 if (b_w == 16)
854 inner_add_yblock_bw_16_obmc_32_sse2(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); 855 inner_add_yblock_bw_16_obmc_32_sse2(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
855 else if (b_w == 8 && obmc_stride == 16) { 856 else if (b_w == 8 && obmc_stride == 16) {
859 inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); 860 inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
860 } else 861 } else
861 ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); 862 ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
862 } 863 }
863 864
864 void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, 865 static void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
865 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ 866 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
866 if (b_w == 16) 867 if (b_w == 16)
867 inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); 868 inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
868 else if (b_w == 8 && obmc_stride == 16) 869 else if (b_w == 8 && obmc_stride == 16)
869 inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); 870 inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
870 else 871 else
871 ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); 872 ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
872 } 873 }
874
875 void ff_dwt_init_x86(DWTContext *c)
876 {
877 mm_flags = mm_support();
878
879 if (mm_flags & FF_MM_MMX) {
880 if(mm_flags & FF_MM_SSE2 & 0){
881 c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
882 #if HAVE_7REGS
883 c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
884 #endif
885 c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
886 }
887 else{
888 if(mm_flags & FF_MM_MMX2){
889 c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx;
890 #if HAVE_7REGS
891 c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
892 #endif
893 }
894 c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
895 }
896 }
897 }