annotate i386/mpegvideo_mmx_template.c @ 635:3e0f62e5eed6 libavcodec

dct cleanup more accurate mmx dct (dont discard bits for fun) fixing mmx quantizer bug for qscale%2==1 (bias was slightly wrong)
author michaelni
date Sun, 01 Sep 2002 16:52:33 +0000
parents bb6a69f9d409
children eaa9ef2e2557
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
220
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
1 /*
429
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
2 * MPEG video MMX templates
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
3 *
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
4 * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
5 *
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
6 * This library is free software; you can redistribute it and/or
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
7 * modify it under the terms of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
8 * License as published by the Free Software Foundation; either
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
9 * version 2 of the License, or (at your option) any later version.
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
10 *
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
11 * This library is distributed in the hope that it will be useful,
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
14 * Lesser General Public License for more details.
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
15 *
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
16 * You should have received a copy of the GNU Lesser General Public
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
17 * License along with this library; if not, write to the Free Software
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
718a22dc121f license/copyright change
glantau
parents: 350
diff changeset
19 */
220
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
20 #undef SPREADW
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
21 #undef PMAXW
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
22 #ifdef HAVE_MMX2
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
23 #define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
24 #define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
25
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
26 #else
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
27 #define SPREADW(a) \
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
28 "punpcklwd " #a ", " #a " \n\t"\
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
29 "punpcklwd " #a ", " #a " \n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
30 #define PMAXW(a,b) \
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
31 "psubusw " #a ", " #b " \n\t"\
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
32 "paddw " #a ", " #b " \n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
33 #endif
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
34
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
35 static int RENAME(dct_quantize)(MpegEncContext *s,
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
36 DCTELEM *block, int n,
344
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
37 int qscale, int *overflow)
220
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
38 {
344
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
39 int level=0, last_non_zero_p1, q; //=0 is cuz gcc says uninitalized ...
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
40 const UINT16 *qmat, *bias;
220
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
41 static __align8 INT16 temp_block[64];
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
42
625
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents: 599
diff changeset
43 //s->fdct (block);
bb6a69f9d409 slow but accurate integer dct from IJG (should be ok with the LGPL as the old DCT is the fast integer DCT from IJG)
michaelni
parents: 599
diff changeset
44 fdct_mmx (block); //cant be anything else ...
344
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
45
220
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
46 if (s->mb_intra) {
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
47 int dummy;
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
48 if (n < 4)
635
3e0f62e5eed6 dct cleanup
michaelni
parents: 625
diff changeset
49 q = s->y_dc_scale<<3;
220
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
50 else
635
3e0f62e5eed6 dct cleanup
michaelni
parents: 625
diff changeset
51 q = s->c_dc_scale<<3;
220
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
52 /* note: block[0] is assumed to be positive */
350
6ebbecc10063 - Advanced Intra Coding (AIC) support for H.263+ encoder, just DC by now.
pulento
parents: 344
diff changeset
53 if (!s->h263_aic) {
220
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
54 #if 1
344
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
55 asm volatile (
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
56 "xorl %%edx, %%edx \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
57 "mul %%ecx \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
58 : "=d" (level), "=a"(dummy)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
59 : "a" (block[0] + (q >> 1)), "c" (inverse[q])
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
60 );
220
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
61 #else
344
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
62 asm volatile (
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
63 "xorl %%edx, %%edx \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
64 "divw %%cx \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
65 "movzwl %%ax, %%eax \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
66 : "=a" (level)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
67 : "a" (block[0] + (q >> 1)), "c" (q)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
68 : "%edx"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
69 );
220
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
70 #endif
350
6ebbecc10063 - Advanced Intra Coding (AIC) support for H.263+ encoder, just DC by now.
pulento
parents: 344
diff changeset
71 } else
6ebbecc10063 - Advanced Intra Coding (AIC) support for H.263+ encoder, just DC by now.
pulento
parents: 344
diff changeset
72 /* For AIC we skip quant/dequant of INTRADC */
635
3e0f62e5eed6 dct cleanup
michaelni
parents: 625
diff changeset
73 level = block[0]>>3;
350
6ebbecc10063 - Advanced Intra Coding (AIC) support for H.263+ encoder, just DC by now.
pulento
parents: 344
diff changeset
74
344
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
75 block[0]=0; //avoid fake overflow
220
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
76 // temp_block[0] = (block[0] + (q >> 1)) / q;
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
77 last_non_zero_p1 = 1;
344
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
78 bias = s->q_intra_matrix16_bias[qscale];
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
79 qmat = s->q_intra_matrix16[qscale];
220
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
80 } else {
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
81 last_non_zero_p1 = 0;
344
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
82 bias = s->q_inter_matrix16_bias[qscale];
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
83 qmat = s->q_inter_matrix16[qscale];
220
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
84 }
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
85
599
b1a191202f96 mpeg4 mpeg quantizer encoding
michaelni
parents: 429
diff changeset
86 if(s->out_format == FMT_H263 && s->mpeg_quant==0){
344
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
87
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
88 asm volatile(
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
89 "movd %%eax, %%mm3 \n\t" // last_non_zero_p1
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
90 SPREADW(%%mm3)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
91 "pxor %%mm7, %%mm7 \n\t" // 0
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
92 "pxor %%mm4, %%mm4 \n\t" // 0
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
93 "movq (%2), %%mm5 \n\t" // qmat[0]
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
94 "pxor %%mm6, %%mm6 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
95 "psubw (%3), %%mm6 \n\t" // -bias[0]
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
96 "movl $-128, %%eax \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
97 ".balign 16 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
98 "1: \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
99 "pxor %%mm1, %%mm1 \n\t" // 0
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
100 "movq (%1, %%eax), %%mm0 \n\t" // block[i]
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
101 "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
102 "pxor %%mm1, %%mm0 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
103 "psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
104 "psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
105 "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
106 "por %%mm0, %%mm4 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
107 "pxor %%mm1, %%mm0 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
108 "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
109 "movq %%mm0, (%5, %%eax) \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
110 "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
111 "movq (%4, %%eax), %%mm1 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
112 "movq %%mm7, (%1, %%eax) \n\t" // 0
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
113 "pandn %%mm1, %%mm0 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
114 PMAXW(%%mm0, %%mm3)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
115 "addl $8, %%eax \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
116 " js 1b \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
117 "movq %%mm3, %%mm0 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
118 "psrlq $32, %%mm3 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
119 PMAXW(%%mm0, %%mm3)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
120 "movq %%mm3, %%mm0 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
121 "psrlq $16, %%mm3 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
122 PMAXW(%%mm0, %%mm3)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
123 "movd %%mm3, %%eax \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
124 "movzbl %%al, %%eax \n\t" // last_non_zero_p1
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
125 : "+a" (last_non_zero_p1)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
126 : "r" (block+64), "r" (qmat), "r" (bias),
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
127 "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
128 );
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
129 // note the asm is split cuz gcc doesnt like that many operands ...
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
130 asm volatile(
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
131 "movd %1, %%mm1 \n\t" // max_qcoeff
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
132 SPREADW(%%mm1)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
133 "psubusw %%mm1, %%mm4 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
134 "packuswb %%mm4, %%mm4 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
135 "movd %%mm4, %0 \n\t" // *overflow
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
136 : "=g" (*overflow)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
137 : "g" (s->max_qcoeff)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
138 );
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
139 }else{ // FMT_H263
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
140 asm volatile(
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
141 "movd %%eax, %%mm3 \n\t" // last_non_zero_p1
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
142 SPREADW(%%mm3)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
143 "pxor %%mm7, %%mm7 \n\t" // 0
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
144 "pxor %%mm4, %%mm4 \n\t" // 0
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
145 "movl $-128, %%eax \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
146 ".balign 16 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
147 "1: \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
148 "pxor %%mm1, %%mm1 \n\t" // 0
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
149 "movq (%1, %%eax), %%mm0 \n\t" // block[i]
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
150 "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
151 "pxor %%mm1, %%mm0 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
152 "psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
153 "movq (%3, %%eax), %%mm6 \n\t" // bias[0]
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
154 "paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
155 "movq (%2, %%eax), %%mm5 \n\t" // qmat[i]
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
156 "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
157 "por %%mm0, %%mm4 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
158 "pxor %%mm1, %%mm0 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
159 "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
160 "movq %%mm0, (%5, %%eax) \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
161 "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
162 "movq (%4, %%eax), %%mm1 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
163 "movq %%mm7, (%1, %%eax) \n\t" // 0
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
164 "pandn %%mm1, %%mm0 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
165 PMAXW(%%mm0, %%mm3)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
166 "addl $8, %%eax \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
167 " js 1b \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
168 "movq %%mm3, %%mm0 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
169 "psrlq $32, %%mm3 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
170 PMAXW(%%mm0, %%mm3)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
171 "movq %%mm3, %%mm0 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
172 "psrlq $16, %%mm3 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
173 PMAXW(%%mm0, %%mm3)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
174 "movd %%mm3, %%eax \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
175 "movzbl %%al, %%eax \n\t" // last_non_zero_p1
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
176 : "+a" (last_non_zero_p1)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
177 : "r" (block+64), "r" (qmat+64), "r" (bias+64),
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
178 "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
179 );
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
180 // note the asm is split cuz gcc doesnt like that many operands ...
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
181 asm volatile(
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
182 "movd %1, %%mm1 \n\t" // max_qcoeff
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
183 SPREADW(%%mm1)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
184 "psubusw %%mm1, %%mm4 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
185 "packuswb %%mm4, %%mm4 \n\t"
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
186 "movd %%mm4, %0 \n\t" // *overflow
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
187 : "=g" (*overflow)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
188 : "g" (s->max_qcoeff)
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
189 );
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
190 }
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
191
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
192 if(s->mb_intra) temp_block[0]= level; //FIXME move afer permute
599
b1a191202f96 mpeg4 mpeg quantizer encoding
michaelni
parents: 429
diff changeset
193
220
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
194 // last_non_zero_p1=64;
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
195 /* permute for IDCT */
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
196 asm volatile(
344
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
197 "movl %0, %%eax \n\t"
220
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
198 "pushl %%ebp \n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
199 "movl %%esp, " MANGLE(esp_temp) "\n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
200 "1: \n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
201 "movzbl (%1, %%eax), %%ebx \n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
202 "movzbl 1(%1, %%eax), %%ebp \n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
203 "movw (%2, %%ebx, 2), %%cx \n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
204 "movw (%2, %%ebp, 2), %%sp \n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
205 "movzbl " MANGLE(permutation) "(%%ebx), %%ebx\n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
206 "movzbl " MANGLE(permutation) "(%%ebp), %%ebp\n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
207 "movw %%cx, (%3, %%ebx, 2) \n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
208 "movw %%sp, (%3, %%ebp, 2) \n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
209 "addl $2, %%eax \n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
210 " js 1b \n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
211 "movl " MANGLE(esp_temp) ", %%esp\n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
212 "popl %%ebp \n\t"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
213 :
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
214 : "g" (-last_non_zero_p1), "d" (zigzag_direct_noperm+last_non_zero_p1), "S" (temp_block), "D" (block)
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
215 : "%eax", "%ebx", "%ecx"
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
216 );
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
217 /*
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
218 for(i=0; i<last_non_zero_p1; i++)
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
219 {
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
220 int j= zigzag_direct_noperm[i];
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
221 block[block_permute_op(j)]= temp_block[j];
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
222 }
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
223 */
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
224 //block_permute(block);
344
9f6071a87e17 fixed msmpeg4 infinite loop if buggy stream
michaelni
parents: 230
diff changeset
225
220
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
226 return last_non_zero_p1 - 1;
0b234715e205 (commit by michael)
arpi_esp
parents:
diff changeset
227 }