annotate x86/vp6dsp_mmx.c @ 10952:ea8f891d997d libavcodec

H264 DXVA2 implementation It allows VLD H264 decoding using DXVA2 (GPU assisted decoding API under VISTA and Windows 7). It is implemented by using AVHWAccel API. It has been tested successfully for some time in VLC using an nvidia card on Windows 7. To compile it, you need to have the system header dxva2api.h (either from microsoft or using http://downloads.videolan.org/pub/videolan/testing/contrib/dxva2api.h) The generated libavcodec.dll does not depend directly on any new lib as the necessary objects are given by the application using FFmpeg.
author fenrir
date Wed, 20 Jan 2010 18:54:51 +0000
parents 492f8911992c
children 7dd2a45249a9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8817
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
1 /**
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
2 * @file libavcodec/x86/vp6dsp_mmx.c
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
3 * MMX-optimized functions for the VP6 decoder
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
4 *
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
5 * Copyright (C) 2009 Sebastien Lucas <sebastien.lucas@gmail.com>
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
6 *
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
7 * This file is part of FFmpeg.
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
8 *
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
9 * FFmpeg is free software; you can redistribute it and/or
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
10 * modify it under the terms of the GNU Lesser General Public
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
11 * License as published by the Free Software Foundation; either
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
12 * version 2.1 of the License, or (at your option) any later version.
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
13 *
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
14 * FFmpeg is distributed in the hope that it will be useful,
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
17 * Lesser General Public License for more details.
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
18 *
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
19 * You should have received a copy of the GNU Lesser General Public
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
20 * License along with FFmpeg; if not, write to the Free Software
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
22 */
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
23
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
24 #include "libavutil/x86_cpu.h"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
25 #include "libavcodec/dsputil.h"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
26 #include "dsputil_mmx.h"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
27 #include "vp6dsp_mmx.h"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
28
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
29
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
30 #define DIAG4_MMX(in1,in2,in3,in4) \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
31 "movq "#in1"(%0), %%mm0 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
32 "movq "#in2"(%0), %%mm1 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
33 "movq %%mm0, %%mm3 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
34 "movq %%mm1, %%mm4 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
35 "punpcklbw %%mm7, %%mm0 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
36 "punpcklbw %%mm7, %%mm1 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
37 "punpckhbw %%mm7, %%mm3 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
38 "punpckhbw %%mm7, %%mm4 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
39 "pmullw 0(%2), %%mm0 \n\t" /* src[x-8 ] * biweight [0] */ \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
40 "pmullw 8(%2), %%mm1 \n\t" /* src[x ] * biweight [1] */ \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
41 "pmullw 0(%2), %%mm3 \n\t" /* src[x-8 ] * biweight [0] */ \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
42 "pmullw 8(%2), %%mm4 \n\t" /* src[x ] * biweight [1] */ \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
43 "paddw %%mm1, %%mm0 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
44 "paddw %%mm4, %%mm3 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
45 "movq "#in3"(%0), %%mm1 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
46 "movq "#in4"(%0), %%mm2 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
47 "movq %%mm1, %%mm4 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
48 "movq %%mm2, %%mm5 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
49 "punpcklbw %%mm7, %%mm1 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
50 "punpcklbw %%mm7, %%mm2 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
51 "punpckhbw %%mm7, %%mm4 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
52 "punpckhbw %%mm7, %%mm5 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
53 "pmullw 16(%2), %%mm1 \n\t" /* src[x+8 ] * biweight [2] */ \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
54 "pmullw 24(%2), %%mm2 \n\t" /* src[x+16] * biweight [3] */ \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
55 "pmullw 16(%2), %%mm4 \n\t" /* src[x+8 ] * biweight [2] */ \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
56 "pmullw 24(%2), %%mm5 \n\t" /* src[x+16] * biweight [3] */ \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
57 "paddw %%mm2, %%mm1 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
58 "paddw %%mm5, %%mm4 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
59 "paddsw %%mm1, %%mm0 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
60 "paddsw %%mm4, %%mm3 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
61 "paddsw %%mm6, %%mm0 \n\t" /* Add 64 */ \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
62 "paddsw %%mm6, %%mm3 \n\t" /* Add 64 */ \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
63 "psraw $7, %%mm0 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
64 "psraw $7, %%mm3 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
65 "packuswb %%mm3, %%mm0 \n\t" \
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
66 "movq %%mm0, (%1) \n\t"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
67
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
68 void ff_vp6_filter_diag4_mmx(uint8_t *dst, uint8_t *src, int stride,
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
69 const int16_t *h_weights, const int16_t *v_weights)
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
70 {
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
71 uint8_t tmp[8*11], *t = tmp;
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
72 int16_t weights[4*4];
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
73 int i;
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
74 src -= stride;
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
75
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
76 for (i=0; i<4*4; i++)
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
77 weights[i] = h_weights[i>>2];
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
78
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
79 __asm__ volatile(
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
80 "pxor %%mm7, %%mm7 \n\t"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
81 "movq "MANGLE(ff_pw_64)", %%mm6 \n\t"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
82 "1: \n\t"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
83 DIAG4_MMX(-1,0,1,2)
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
84 "add $8, %1 \n\t"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
85 "add %3, %0 \n\t"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
86 "decl %4 \n\t"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
87 "jnz 1b \n\t"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
88 : "+r"(src), "+r"(t)
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
89 : "r"(weights), "r"((x86_reg)stride), "r"(11)
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
90 : "memory");
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
91
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
92 t = tmp + 8;
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
93 for (i=0; i<4*4; i++)
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
94 weights[i] = v_weights[i>>2];
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
95
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
96 __asm__ volatile(
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
97 "pxor %%mm7, %%mm7 \n\t"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
98 "movq "MANGLE(ff_pw_64)", %%mm6 \n\t"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
99 "1: \n\t"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
100 DIAG4_MMX(-8,0,8,16)
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
101 "add $8, %0 \n\t"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
102 "add %3, %1 \n\t"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
103 "decl %4 \n\t"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
104 "jnz 1b \n\t"
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
105 : "+r"(t), "+r"(dst)
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
106 : "r"(weights), "r"((x86_reg)stride), "r"(8)
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
107 : "memory");
492f8911992c add MMX version of vp6_filter_diag
aurel
parents:
diff changeset
108 }