Mercurial > libavcodec.hg
annotate ppc/int_altivec.c @ 6323:e6da66f378c7 libavcodec
mpegvideo.h has two function declarations with the 'inline' specifier
but no definition for those functions. The C standard requires a
definition to appear in the same translation unit for any function
declared with 'inline'. Most of the files including mpegvideo.h do not
define those functions. Fix this by removing the 'inline' specifiers
from the header.
patch by Uoti Urpala
author | diego |
---|---|
date | Sun, 03 Feb 2008 17:54:30 +0000 |
parents | 669a97223dc7 |
children | f7cbb7733146 |
rev | line source |
---|---|
4838
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
1 /* |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
2 * Copyright (c) 2007 Luca Barbato <lu_zero@gentoo.org> |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
3 * |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
4 * This file is part of FFmpeg. |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
5 * |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
6 * FFmpeg is free software; you can redistribute it and/or |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
7 * modify it under the terms of the GNU Lesser General Public |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
8 * License as published by the Free Software Foundation; either |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
9 * version 2.1 of the License, or (at your option) any later version. |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
10 * |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
11 * FFmpeg is distributed in the hope that it will be useful, |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
14 * Lesser General Public License for more details. |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
15 * |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
16 * You should have received a copy of the GNU Lesser General Public |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
17 * License along with FFmpeg; if not, write to the Free Software |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
19 */ |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
20 |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
21 /** |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
22 ** @file int_altivec.c |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
23 ** integer misc ops. |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
24 **/ |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
25 |
5010
d5ba514e3f4a
Add libavcodec to compiler include flags in order to simplify header
diego
parents:
4838
diff
changeset
|
26 #include "dsputil.h" |
4838
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
27 |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
28 #include "gcc_fixes.h" |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
29 |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
30 #include "dsputil_altivec.h" |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
31 |
5255 | 32 static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2, |
33 int size) { | |
4838
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
34 int i, size16; |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
35 vector signed char vpix1; |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
36 vector signed short vpix2, vdiff, vpix1l,vpix1h; |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
37 union { vector signed int vscore; |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
38 int32_t score[4]; |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
39 } u; |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
40 u.vscore = vec_splat_s32(0); |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
41 // |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
42 //XXX lazy way, fix it later |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
43 |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
44 #define vec_unaligned_load(b) \ |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
45 vec_perm(vec_ld(0,b),vec_ld(15,b),vec_lvsl(0, b)); |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
46 |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
47 size16 = size >> 4; |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
48 while(size16) { |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
49 // score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]); |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
50 //load pix1 and the first batch of pix2 |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
51 |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
52 vpix1 = vec_unaligned_load(pix1); |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
53 vpix2 = vec_unaligned_load(pix2); |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
54 pix2 += 8; |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
55 //unpack |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
56 vpix1h = vec_unpackh(vpix1); |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
57 vdiff = vec_sub(vpix1h, vpix2); |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
58 vpix1l = vec_unpackl(vpix1); |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
59 // load another batch from pix2 |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
60 vpix2 = vec_unaligned_load(pix2); |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
61 u.vscore = vec_msum(vdiff, vdiff, u.vscore); |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
62 vdiff = vec_sub(vpix1l, vpix2); |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
63 u.vscore = vec_msum(vdiff, vdiff, u.vscore); |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
64 pix1 += 16; |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
65 pix2 += 8; |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
66 size16--; |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
67 } |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
68 u.vscore = vec_sums(u.vscore, vec_splat_s32(0)); |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
69 |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
70 size %= 16; |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
71 for (i = 0; i < size; i++) { |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
72 u.score[3] += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]); |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
73 } |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
74 return u.score[3]; |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
75 } |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
76 |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
77 void int_init_altivec(DSPContext* c, AVCodecContext *avctx) |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
78 { |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
79 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec; |
eeac11145c4e
ssd_int8_vs_int16_altivec, not completely benchmarkedwith svq1
lu_zero
parents:
diff
changeset
|
80 } |