Mercurial > libavcodec.hg
comparison alpha/mpegvideo_alpha.c @ 8617:2d6a70824dfe libavcodec
Alpha: factor common parts from dct_unquantize_h263_intra/inter_axp
author | mru |
---|---|
date | Sat, 17 Jan 2009 21:44:58 +0000 |
parents | 225104b68cbb |
children |
comparison
equal
deleted
inserted
replaced
8616:225104b68cbb | 8617:2d6a70824dfe |
---|---|
21 | 21 |
22 #include "libavcodec/dsputil.h" | 22 #include "libavcodec/dsputil.h" |
23 #include "libavcodec/mpegvideo.h" | 23 #include "libavcodec/mpegvideo.h" |
24 #include "asm.h" | 24 #include "asm.h" |
25 | 25 |
26 static void dct_unquantize_h263_intra_axp(MpegEncContext *s, DCTELEM *block, | 26 static void dct_unquantize_h263_axp(DCTELEM *block, int n_coeffs, |
27 int n, int qscale) | 27 uint64_t qscale, uint64_t qadd) |
28 { | 28 { |
29 int i, n_coeffs; | 29 uint64_t qmul = qscale << 1; |
30 uint64_t qmul, qadd; | 30 uint64_t correction = WORD_VEC(qmul * 255 >> 8); |
31 uint64_t correction; | 31 int i; |
32 DCTELEM *orig_block = block; | |
33 DCTELEM block0 = block[0]; | |
34 | 32 |
35 qmul = qscale << 1; | 33 qadd = WORD_VEC(qadd); |
36 /* This mask kills spill from negative subwords to the next subword. */ | |
37 correction = WORD_VEC(qmul * 255 >> 8); | |
38 | |
39 if (!s->h263_aic) { | |
40 if (n < 4) | |
41 block0 *= s->y_dc_scale; | |
42 else | |
43 block0 *= s->c_dc_scale; | |
44 qadd = WORD_VEC((qscale - 1) | 1); | |
45 } else { | |
46 qadd = 0; | |
47 } | |
48 | |
49 if(s->ac_pred) | |
50 n_coeffs = 63; | |
51 else | |
52 n_coeffs = s->inter_scantable.raster_end[s->block_last_index[n]]; | |
53 | 34 |
54 for(i = 0; i <= n_coeffs; block += 4, i += 4) { | 35 for(i = 0; i <= n_coeffs; block += 4, i += 4) { |
55 uint64_t levels, negmask, zeros, add, sub; | 36 uint64_t levels, negmask, zeros, add, sub; |
56 | 37 |
57 levels = ldq(block); | 38 levels = ldq(block); |
84 levels += add; | 65 levels += add; |
85 levels -= sub; | 66 levels -= sub; |
86 | 67 |
87 stq(levels, block); | 68 stq(levels, block); |
88 } | 69 } |
70 } | |
89 | 71 |
90 orig_block[0] = block0; | 72 static void dct_unquantize_h263_intra_axp(MpegEncContext *s, DCTELEM *block, |
73 int n, int qscale) | |
74 { | |
75 int n_coeffs; | |
76 uint64_t qadd; | |
77 DCTELEM block0 = block[0]; | |
78 | |
79 if (!s->h263_aic) { | |
80 if (n < 4) | |
81 block0 *= s->y_dc_scale; | |
82 else | |
83 block0 *= s->c_dc_scale; | |
84 qadd = (qscale - 1) | 1; | |
85 } else { | |
86 qadd = 0; | |
87 } | |
88 | |
89 if(s->ac_pred) | |
90 n_coeffs = 63; | |
91 else | |
92 n_coeffs = s->inter_scantable.raster_end[s->block_last_index[n]]; | |
93 | |
94 dct_unquantize_h263_axp(block, n_coeffs, qscale, qadd); | |
95 | |
96 block[0] = block0; | |
91 } | 97 } |
92 | 98 |
93 static void dct_unquantize_h263_inter_axp(MpegEncContext *s, DCTELEM *block, | 99 static void dct_unquantize_h263_inter_axp(MpegEncContext *s, DCTELEM *block, |
94 int n, int qscale) | 100 int n, int qscale) |
95 { | 101 { |
96 int i, n_coeffs; | 102 int n_coeffs = s->inter_scantable.raster_end[s->block_last_index[n]]; |
97 uint64_t qmul, qadd; | 103 dct_unquantize_h263_axp(block, n_coeffs, qscale, (qscale - 1) | 1); |
98 uint64_t correction; | |
99 | |
100 qadd = WORD_VEC((qscale - 1) | 1); | |
101 qmul = qscale << 1; | |
102 /* This mask kills spill from negative subwords to the next subword. */ | |
103 correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */ | |
104 | |
105 n_coeffs = s->inter_scantable.raster_end[s->block_last_index[n]]; | |
106 | |
107 for(i = 0; i <= n_coeffs; block += 4, i += 4) { | |
108 uint64_t levels, negmask, zeros, add; | |
109 | |
110 levels = ldq(block); | |
111 if (levels == 0) | |
112 continue; | |
113 | |
114 #ifdef __alpha_max__ | |
115 /* I don't think the speed difference justifies runtime | |
116 detection. */ | |
117 negmask = maxsw4(levels, -1); /* negative -> ffff (-1) */ | |
118 negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */ | |
119 #else | |
120 negmask = cmpbge(WORD_VEC(0x7fff), levels); | |
121 negmask &= (negmask >> 1) | (1 << 7); | |
122 negmask = zap(-1, negmask); | |
123 #endif | |
124 | |
125 zeros = cmpbge(0, levels); | |
126 zeros &= zeros >> 1; | |
127 /* zeros |= zeros << 1 is not needed since qadd <= 255, so | |
128 zapping the lower byte suffices. */ | |
129 | |
130 levels *= qmul; | |
131 levels -= correction & (negmask << 16); | |
132 | |
133 /* Negate qadd for negative levels. */ | |
134 add = qadd ^ negmask; | |
135 add += WORD_VEC(0x0001) & negmask; | |
136 /* Set qadd to 0 for levels == 0. */ | |
137 add = zap(add, zeros); | |
138 | |
139 levels += add; | |
140 | |
141 stq(levels, block); | |
142 } | |
143 } | 104 } |
144 | 105 |
145 void MPV_common_init_axp(MpegEncContext *s) | 106 void MPV_common_init_axp(MpegEncContext *s) |
146 { | 107 { |
147 s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_axp; | 108 s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_axp; |