Mercurial > libavcodec.hg
comparison x86/snowdsp_mmx.c @ 11485:0f0cd6b5791f libavcodec
Separate DWT from snow and dsputil
This moves the DWT functions from snow.c and dsputil.c to a file of
their own. A new struct, DWTContext, holds the function pointers
previously part of DSPContext.
author | mru |
---|---|
date | Sun, 14 Mar 2010 17:50:12 +0000 |
parents | b57409c0c286 |
children | 3fc4c625b6f3 |
comparison
equal
deleted
inserted
replaced
11484:5330f17dc769 | 11485:0f0cd6b5791f |
---|---|
20 */ | 20 */ |
21 | 21 |
22 #include "libavutil/x86_cpu.h" | 22 #include "libavutil/x86_cpu.h" |
23 #include "libavcodec/avcodec.h" | 23 #include "libavcodec/avcodec.h" |
24 #include "libavcodec/snow.h" | 24 #include "libavcodec/snow.h" |
25 #include "libavcodec/dwt.h" | |
25 #include "dsputil_mmx.h" | 26 #include "dsputil_mmx.h" |
26 | 27 |
27 void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ | 28 static void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ |
28 const int w2= (width+1)>>1; | 29 const int w2= (width+1)>>1; |
29 DECLARE_ALIGNED(16, IDWTELEM, temp)[width>>1]; | 30 DECLARE_ALIGNED(16, IDWTELEM, temp)[width>>1]; |
30 const int w_l= (width>>1); | 31 const int w_l= (width>>1); |
31 const int w_r= w2 - 1; | 32 const int w_r= w2 - 1; |
32 int i; | 33 int i; |
211 ); | 212 ); |
212 } | 213 } |
213 } | 214 } |
214 } | 215 } |
215 | 216 |
216 void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ | 217 static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ |
217 const int w2= (width+1)>>1; | 218 const int w2= (width+1)>>1; |
218 IDWTELEM temp[width >> 1]; | 219 IDWTELEM temp[width >> 1]; |
219 const int w_l= (width>>1); | 220 const int w_l= (width>>1); |
220 const int w_r= w2 - 1; | 221 const int w_r= w2 - 1; |
221 int i; | 222 int i; |
434 "movdqa %%"s0", %%"t0" \n\t"\ | 435 "movdqa %%"s0", %%"t0" \n\t"\ |
435 "movdqa %%"s1", %%"t1" \n\t"\ | 436 "movdqa %%"s1", %%"t1" \n\t"\ |
436 "movdqa %%"s2", %%"t2" \n\t"\ | 437 "movdqa %%"s2", %%"t2" \n\t"\ |
437 "movdqa %%"s3", %%"t3" \n\t" | 438 "movdqa %%"s3", %%"t3" \n\t" |
438 | 439 |
439 void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ | 440 static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ |
440 x86_reg i = width; | 441 x86_reg i = width; |
441 | 442 |
442 while(i & 0x1F) | 443 while(i & 0x1F) |
443 { | 444 { |
444 i--; | 445 i--; |
532 "movq %%"s1", %%"t1" \n\t"\ | 533 "movq %%"s1", %%"t1" \n\t"\ |
533 "movq %%"s2", %%"t2" \n\t"\ | 534 "movq %%"s2", %%"t2" \n\t"\ |
534 "movq %%"s3", %%"t3" \n\t" | 535 "movq %%"s3", %%"t3" \n\t" |
535 | 536 |
536 | 537 |
537 void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ | 538 static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ |
538 x86_reg i = width; | 539 x86_reg i = width; |
539 while(i & 15) | 540 while(i & 15) |
540 { | 541 { |
541 i--; | 542 i--; |
542 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS; | 543 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS; |
845 snow_inner_add_yblock_mmx_accum("0", "536", "8") | 846 snow_inner_add_yblock_mmx_accum("0", "536", "8") |
846 snow_inner_add_yblock_mmx_mix("16", "8") | 847 snow_inner_add_yblock_mmx_mix("16", "8") |
847 snow_inner_add_yblock_mmx_end("32") | 848 snow_inner_add_yblock_mmx_end("32") |
848 } | 849 } |
849 | 850 |
850 void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, | 851 static void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, |
851 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | 852 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ |
852 | 853 |
853 if (b_w == 16) | 854 if (b_w == 16) |
854 inner_add_yblock_bw_16_obmc_32_sse2(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); | 855 inner_add_yblock_bw_16_obmc_32_sse2(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); |
855 else if (b_w == 8 && obmc_stride == 16) { | 856 else if (b_w == 8 && obmc_stride == 16) { |
859 inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); | 860 inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); |
860 } else | 861 } else |
861 ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); | 862 ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); |
862 } | 863 } |
863 | 864 |
864 void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, | 865 static void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, |
865 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ | 866 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ |
866 if (b_w == 16) | 867 if (b_w == 16) |
867 inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); | 868 inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); |
868 else if (b_w == 8 && obmc_stride == 16) | 869 else if (b_w == 8 && obmc_stride == 16) |
869 inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); | 870 inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); |
870 else | 871 else |
871 ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); | 872 ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); |
872 } | 873 } |
874 | |
875 void ff_dwt_init_x86(DWTContext *c) | |
876 { | |
877 mm_flags = mm_support(); | |
878 | |
879 if (mm_flags & FF_MM_MMX) { | |
880 if(mm_flags & FF_MM_SSE2 & 0){ | |
881 c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; | |
882 #if HAVE_7REGS | |
883 c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; | |
884 #endif | |
885 c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; | |
886 } | |
887 else{ | |
888 if(mm_flags & FF_MM_MMX2){ | |
889 c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; | |
890 #if HAVE_7REGS | |
891 c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; | |
892 #endif | |
893 } | |
894 c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; | |
895 } | |
896 } | |
897 } |