annotate x86/vp6dsp_sse2.c @ 11460:a385968f8fb0 libavcodec

snow: remove unused stub functions w53_32_c() and w97_32_c() are defined as stubs when snow encoder is disabled. In this case, those functions are not referenced at all and do thus not need to be defined.
author mru
date Thu, 11 Mar 2010 02:32:05 +0000
parents 17cc6df384a6
children 7dd2a45249a9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8818
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
1 /**
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
2 * @file libavcodec/x86/vp6dsp_mmx.c
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
3 * SSE2-optimized functions for the VP6 decoder
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
4 *
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
5 * Copyright (C) 2009 Zuxy Meng <zuxy.meng@gmail.com>
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
6 *
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
7 * This file is part of FFmpeg.
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
8 *
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
9 * FFmpeg is free software; you can redistribute it and/or
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
10 * modify it under the terms of the GNU Lesser General Public
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
11 * License as published by the Free Software Foundation; either
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
12 * version 2.1 of the License, or (at your option) any later version.
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
13 *
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
14 * FFmpeg is distributed in the hope that it will be useful,
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
17 * Lesser General Public License for more details.
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
18 *
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
19 * You should have received a copy of the GNU Lesser General Public
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
20 * License along with FFmpeg; if not, write to the Free Software
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
22 */
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
23
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
24 #include "libavutil/x86_cpu.h"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
25 #include "libavcodec/dsputil.h"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
26 #include "dsputil_mmx.h"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
27 #include "vp6dsp_sse2.h"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
28
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
29 #define DIAG4_SSE2(in1,in2,in3,in4) \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
30 "movq "#in1"(%0), %%xmm0 \n\t" \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
31 "movq "#in2"(%0), %%xmm1 \n\t" \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
32 "punpcklbw %%xmm7, %%xmm0 \n\t" \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
33 "punpcklbw %%xmm7, %%xmm1 \n\t" \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
34 "pmullw %%xmm4, %%xmm0 \n\t" /* src[x-8 ] * biweight [0] */ \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
35 "pmullw %%xmm5, %%xmm1 \n\t" /* src[x ] * biweight [1] */ \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
36 "paddw %%xmm1, %%xmm0 \n\t" \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
37 "movq "#in3"(%0), %%xmm1 \n\t" \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
38 "movq "#in4"(%0), %%xmm2 \n\t" \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
39 "punpcklbw %%xmm7, %%xmm1 \n\t" \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
40 "punpcklbw %%xmm7, %%xmm2 \n\t" \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
41 "pmullw %%xmm6, %%xmm1 \n\t" /* src[x+8 ] * biweight [2] */ \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
42 "pmullw %%xmm3, %%xmm2 \n\t" /* src[x+16] * biweight [3] */ \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
43 "paddw %%xmm2, %%xmm1 \n\t" \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
44 "paddsw %%xmm1, %%xmm0 \n\t" \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
45 "paddsw "MANGLE(ff_pw_64)", %%xmm0 \n\t" /* Add 64 */ \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
46 "psraw $7, %%xmm0 \n\t" \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
47 "packuswb %%xmm0, %%xmm0 \n\t" \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
48 "movq %%xmm0, (%1) \n\t" \
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
49
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
50 void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, int stride,
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
51 const int16_t *h_weights,const int16_t *v_weights)
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
52 {
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
53 uint8_t tmp[8*11], *t = tmp;
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
54 src -= stride;
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
55
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
56 __asm__ volatile(
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
57 "pxor %%xmm7, %%xmm7 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
58 "movq %4, %%xmm3 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
59 "pshuflw $0, %%xmm3, %%xmm4 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
60 "punpcklqdq %%xmm4, %%xmm4 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
61 "pshuflw $85, %%xmm3, %%xmm5 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
62 "punpcklqdq %%xmm5, %%xmm5 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
63 "pshuflw $170, %%xmm3, %%xmm6 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
64 "punpcklqdq %%xmm6, %%xmm6 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
65 "pshuflw $255, %%xmm3, %%xmm3 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
66 "punpcklqdq %%xmm3, %%xmm3 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
67 "1: \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
68 DIAG4_SSE2(-1,0,1,2)
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
69 "add $8, %1 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
70 "add %2, %0 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
71 "decl %3 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
72 "jnz 1b \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
73 : "+r"(src), "+r"(t)
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
74 : "g"((x86_reg)stride), "r"(11), "m"(*(const int64_t*)h_weights)
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
75 : "memory");
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
76
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
77 t = tmp + 8;
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
78
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
79 __asm__ volatile(
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
80 "movq %4, %%xmm3 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
81 "pshuflw $0, %%xmm3, %%xmm4 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
82 "punpcklqdq %%xmm4, %%xmm4 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
83 "pshuflw $85, %%xmm3, %%xmm5 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
84 "punpcklqdq %%xmm5, %%xmm5 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
85 "pshuflw $170, %%xmm3, %%xmm6 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
86 "punpcklqdq %%xmm6, %%xmm6 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
87 "pshuflw $255, %%xmm3, %%xmm3 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
88 "punpcklqdq %%xmm3, %%xmm3 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
89 "1: \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
90 DIAG4_SSE2(-8,0,8,16)
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
91 "add $8, %0 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
92 "add %2, %1 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
93 "decl %3 \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
94 "jnz 1b \n\t"
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
95 : "+r"(t), "+r"(dst)
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
96 : "g"((x86_reg)stride), "r"(8), "m"(*(const int64_t*)v_weights)
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
97 : "memory");
17cc6df384a6 add SSE2 version of vp6_filter_diag
aurel
parents:
diff changeset
98 }