Mercurial > libavcodec.hg
annotate x86/vp56dsp_init.c @ 12492:58a960d6e34c libavcodec
Rename h264_idct_sse2.asm to h264_idct.asm; move inline IDCT asm from
h264dsp_mmx.c to h264_idct.asm (as yasm code). Because the loops are now
coded in asm instead of C, this is (depending on the function) up to 50%
faster for cases where gcc didn't do a great job at looping.
Since h264_idct_add8() is now faster than the manual loop setup in h264.c,
in-asm idct calling can now be enabled for chroma as well (see r16207). For
MMX, this is 5% faster. For SSE2 (which isn't done for chroma if h264.c does
the looping), this makes it up to 50% faster. Speed gain overall is ~0.5-1.0%.
author | rbultje |
---|---|
date | Tue, 14 Sep 2010 13:36:26 +0000 |
parents | 9fef0a8ddd63 |
children |
rev | line source |
---|---|
12417
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
1 /* |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
2 * VP6 MMX/SSE2 optimizations |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
3 * Copyright (C) 2009 Sebastien Lucas <sebastien.lucas@gmail.com> |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
4 * Copyright (C) 2009 Zuxy Meng <zuxy.meng@gmail.com> |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
5 * |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
6 * This file is part of FFmpeg. |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
7 * |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
8 * FFmpeg is free software; you can redistribute it and/or |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
9 * modify it under the terms of the GNU Lesser General Public |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
10 * License as published by the Free Software Foundation; either |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
11 * version 2.1 of the License, or (at your option) any later version. |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
12 * |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
13 * FFmpeg is distributed in the hope that it will be useful, |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
16 * Lesser General Public License for more details. |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
17 * |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
18 * You should have received a copy of the GNU Lesser General Public |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
19 * License along with FFmpeg; if not, write to the Free Software |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
21 */ |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
22 |
12475
9fef0a8ddd63
Move mm_support() from libavcodec to libavutil, make it a public
stefano
parents:
12456
diff
changeset
|
23 #include "libavutil/cpu.h" |
12417
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
24 #include "libavutil/x86_cpu.h" |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
25 #include "libavcodec/dsputil.h" |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
26 #include "libavcodec/vp56dsp.h" |
12418
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
12417
diff
changeset
|
27 |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
12417
diff
changeset
|
28 void ff_vp6_filter_diag4_mmx(uint8_t *dst, uint8_t *src, int stride, |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
12417
diff
changeset
|
29 const int16_t *h_weights,const int16_t *v_weights); |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
12417
diff
changeset
|
30 void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, int stride, |
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
12417
diff
changeset
|
31 const int16_t *h_weights,const int16_t *v_weights); |
12417
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
32 |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
33 av_cold void ff_vp56dsp_init_x86(VP56DSPContext* c, enum CodecID codec) |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
34 { |
12418
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
12417
diff
changeset
|
35 #if HAVE_YASM |
12475
9fef0a8ddd63
Move mm_support() from libavcodec to libavutil, make it a public
stefano
parents:
12456
diff
changeset
|
36 int mm_flags = av_get_cpu_flags(); |
12417
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
37 |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
38 if (CONFIG_VP6_DECODER && codec == CODEC_ID_VP6) { |
12456
a5ddb39627fd
Rename FF_MM_ symbols related to CPU features flags as AV_CPU_FLAG_
stefano
parents:
12418
diff
changeset
|
39 if (mm_flags & AV_CPU_FLAG_MMX) { |
12417
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
40 c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx; |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
41 } |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
42 |
12456
a5ddb39627fd
Rename FF_MM_ symbols related to CPU features flags as AV_CPU_FLAG_
stefano
parents:
12418
diff
changeset
|
43 if (mm_flags & AV_CPU_FLAG_SSE2) { |
12417
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
44 c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2; |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
45 } |
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
46 } |
12418
e17840120b80
Move vp6_filter_diag4() x86 SIMD code from inline ASM to YASM. This should
rbultje
parents:
12417
diff
changeset
|
47 #endif |
12417
9f06475db098
Move vp6_filter_diag4() from DSPContext to VP56DSPContext.
rbultje
parents:
diff
changeset
|
48 } |