Mercurial > libavcodec.hg
annotate armv4l/mpegvideo_iwmmxt.c @ 3198:6b9f0c4fbdbe libavcodec
First part of a series of speed-enchancing patches.
This one sets up a snow.h and makes snow use the dsputil function pointer
framework to access the three functions that will be implemented in asm
in the other parts of the patchset.
Patch by Robert Edele < yartrebo AH earthlink POIS net>
Original thread:
Subject: [Ffmpeg-devel] [PATCH] Snow mmx+sse2 asm optimizations
Date: Sun, 05 Feb 2006 12:47:14 -0500
author | gpoirier |
---|---|
date | Thu, 16 Mar 2006 19:18:18 +0000 |
parents | 930e56f92c57 |
children | c537a97eec66 |
rev | line source |
---|---|
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
1 #include "../dsputil.h" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
2 #include "../mpegvideo.h" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
3 #include "../avcodec.h" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
4 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
5 static void dct_unquantize_h263_intra_iwmmxt(MpegEncContext *s, |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
6 DCTELEM *block, int n, int qscale) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
7 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
8 int level, qmul, qadd; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
9 int nCoeffs; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
10 DCTELEM *block_orig = block; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
11 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
12 assert(s->block_last_index[n]>=0); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
13 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
14 qmul = qscale << 1; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
15 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
16 if (!s->h263_aic) { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
17 if (n < 4) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
18 level = block[0] * s->y_dc_scale; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
19 else |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
20 level = block[0] * s->c_dc_scale; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
21 qadd = (qscale - 1) | 1; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
22 }else{ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
23 qadd = 0; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
24 level = block[0]; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
25 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
26 if(s->ac_pred) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
27 nCoeffs=63; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
28 else |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
29 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
30 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
31 __asm__ __volatile__ ( |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
32 /* "movd %1, %%mm6 \n\t" //qmul */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
33 /* "packssdw %%mm6, %%mm6 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
34 /* "packssdw %%mm6, %%mm6 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
35 "tbcsth wr6, %[qmul] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
36 /* "movd %2, %%mm5 \n\t" //qadd */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
37 /* "packssdw %%mm5, %%mm5 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
38 /* "packssdw %%mm5, %%mm5 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
39 "tbcsth wr5, %[qadd] \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
40 "wzero wr7 \n\t" /* "pxor %%mm7, %%mm7 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
41 "wzero wr4 \n\t" /* "pxor %%mm4, %%mm4 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
42 "wsubh wr7, wr5, wr7 \n\t" /* "psubw %%mm5, %%mm7 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
43 "1: \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
44 "wldrd wr2, [%[block]] \n\t" /* "movq (%0, %3), %%mm0 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
45 "wldrd wr3, [%[block], #8] \n\t" /* "movq 8(%0, %3), %%mm1 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
46 "wmulsl wr0, wr6, wr2 \n\t" /* "pmullw %%mm6, %%mm0 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
47 "wmulsl wr1, wr6, wr3 \n\t" /* "pmullw %%mm6, %%mm1 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
48 /* "movq (%0, %3), %%mm2 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
49 /* "movq 8(%0, %3), %%mm3 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
50 "wcmpgtsh wr2, wr4, wr2 \n\t" /* "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
51 "wcmpgtsh wr3, wr4, wr2 \n\t" /* "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
52 "wxor wr0, wr2, wr0 \n\t" /* "pxor %%mm2, %%mm0 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
53 "wxor wr1, wr3, wr1 \n\t" /* "pxor %%mm3, %%mm1 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
54 "waddh wr0, wr7, wr0 \n\t" /* "paddw %%mm7, %%mm0 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
55 "waddh wr1, wr7, wr1 \n\t" /* "paddw %%mm7, %%mm1 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
56 "wxor wr2, wr0, wr2 \n\t" /* "pxor %%mm0, %%mm2 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
57 "wxor wr3, wr1, wr3 \n\t" /* "pxor %%mm1, %%mm3 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
58 "wcmpeqh wr0, wr7, wr0 \n\t" /* "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
59 "wcmpeqh wr1, wr7, wr1 \n\t" /* "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
60 "wandn wr0, wr2, wr0 \n\t" /* "pandn %%mm2, %%mm0 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
61 "wandn wr1, wr3, wr1 \n\t" /* "pandn %%mm3, %%mm1 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
62 "wstrd wr0, [%[block]] \n\t" /* "movq %%mm0, (%0, %3) \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
63 "wstrd wr1, [%[block], #8] \n\t" /* "movq %%mm1, 8(%0, %3) \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
64 "add %[block], %[block], #16 \n\t" /* "addl $16, %3 \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
65 "subs %[i], %[i], #1 \n\t" |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
66 "bne 1b \n\t" /* "jng 1b \n\t" */ |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
67 :[block]"+r"(block) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
68 :[i]"r"((nCoeffs + 8) / 8), [qmul]"r"(qmul), [qadd]"r"(qadd) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
69 :"memory"); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
70 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
71 block_orig[0] = level; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
72 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
73 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
74 #if 0 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
75 static void dct_unquantize_h263_inter_iwmmxt(MpegEncContext *s, |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
76 DCTELEM *block, int n, int qscale) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
77 { |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
78 int nCoeffs; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
79 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
80 assert(s->block_last_index[n]>=0); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
81 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
82 if(s->ac_pred) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
83 nCoeffs=63; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
84 else |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
85 nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
86 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
87 ippiQuantInvInter_Compact_H263_16s_I(block, nCoeffs+1, qscale); |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
88 } |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
89 #endif |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
90 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
91 void MPV_common_init_iwmmxt(MpegEncContext *s) |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
92 { |
2776
930e56f92c57
IWMMXT configure support + runtime selection patch by (Gildas Bazin, gbazin : altern org)
michael
parents:
2734
diff
changeset
|
93 if (!(mm_flags & MM_IWMMXT)) return; |
930e56f92c57
IWMMXT configure support + runtime selection patch by (Gildas Bazin, gbazin : altern org)
michael
parents:
2734
diff
changeset
|
94 |
2734
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
95 s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_iwmmxt; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
96 #if 0 |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
97 s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_iwmmxt; |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
98 #endif |
aeea63c97878
Better ARM support for mplayer/ffmpeg, ported from atty fork
michael
parents:
diff
changeset
|
99 } |