comparison libmpcodecs/cmmx.h @ 11600:5eb66d37d539

Yet another inverse telecine filter by Zoltan Hidvegi <mplayer@hzoli.2y.net>. Also heavily MMX centric.
author alex
date Mon, 08 Dec 2003 22:57:47 +0000
parents
children 3f0d00abc073
comparison
equal deleted inserted replaced
11599:ad9216814665 11600:5eb66d37d539
1 /*
2 * x86 MMX and MMX2 packed byte operations in portable C.
3 * Extra instructions: pdiffub, pcmpzb, psumbw, pcmpgtub
4 * Author: Zoltan Hidvegi
5 */
6
7 #ifndef __CMMX_H
8 #define __CMMX_H
9
10 typedef unsigned long cmmx_t;
11
12 #define ONE_BYTES (~(cmmx_t)0 / 255)
13 #define SIGN_BITS (ONE_BYTES << 7)
14 #define LOWBW_MASK (~(cmmx_t)0 / 257)
15
16 static inline cmmx_t
17 paddb(cmmx_t a, cmmx_t b)
18 {
19 return ((a & ~SIGN_BITS) + (b & ~SIGN_BITS)) ^ ((a^b) & SIGN_BITS);
20 }
21
22 static inline cmmx_t
23 psubb(cmmx_t a, cmmx_t b)
24 {
25 return ((a | SIGN_BITS) - (b & ~SIGN_BITS)) ^ (~(a^b) & SIGN_BITS);
26 }
27
28 static inline cmmx_t
29 paddusb(cmmx_t a, cmmx_t b)
30 {
31 cmmx_t s = (a & ~SIGN_BITS) + (b & ~SIGN_BITS);
32 cmmx_t abs = (a | b) & SIGN_BITS;
33 cmmx_t c = abs & (s | (a & b));
34 return s | abs | (abs - (c >> 7));
35 }
36
37 static inline cmmx_t
38 paddusb_s(cmmx_t a, cmmx_t b)
39 {
40 cmmx_t sum = a+b;
41 cmmx_t ov = sum & SIGN_BITS;
42 return sum + (sum ^ (ov - (ov>>7)));
43 }
44
45 static inline cmmx_t
46 psubusb(cmmx_t a, cmmx_t b)
47 {
48 cmmx_t s = (a | SIGN_BITS) - (b & ~SIGN_BITS);
49 cmmx_t anb = a & ~b;
50 cmmx_t c = (anb | (s & ~(a^b))) & SIGN_BITS;
51 return s & ((c & anb) | (c - (c >> 7)));
52 }
53
54 static inline cmmx_t
55 psubusb_s(cmmx_t a, cmmx_t b)
56 {
57 cmmx_t d = (a|SIGN_BITS) - b;
58 cmmx_t m = d & SIGN_BITS;
59 return d & (m - (m>>7));
60 }
61
62 static inline cmmx_t
63 pcmpgtub(cmmx_t b, cmmx_t a)
64 {
65 cmmx_t s = (a | SIGN_BITS) - (b & ~SIGN_BITS);
66 cmmx_t ret = ((~a & b) | (~s & ~(a ^ b))) & SIGN_BITS;
67 return ret | (ret - (ret >> 7));
68 }
69
70 static inline cmmx_t
71 pdiffub(cmmx_t a, cmmx_t b)
72 {
73 cmmx_t xs = (~a ^ b) & SIGN_BITS;
74 cmmx_t s = ((a | SIGN_BITS) - (b & ~SIGN_BITS)) ^ xs;
75 cmmx_t gt = ((~a & b) | (s & xs)) & SIGN_BITS;
76 cmmx_t gt7 = gt >> 7;
77 return (s ^ gt ^ (gt - gt7)) + gt7;
78 }
79
80 static inline cmmx_t
81 pdiffub_s(cmmx_t a, cmmx_t b)
82 {
83 cmmx_t d = (a|SIGN_BITS) - b;
84 cmmx_t g = (~d & SIGN_BITS) >> 7;
85 return (d ^ (SIGN_BITS-g)) + g;
86 }
87
88 static inline cmmx_t
89 pmaxub(cmmx_t a, cmmx_t b)
90 {
91 return psubusb(a,b) + b;
92 }
93
94 static inline cmmx_t
95 pminub(cmmx_t a, cmmx_t b)
96 {
97 return paddusb(a,~b) - ~b;
98 }
99
100 static inline cmmx_t
101 pminub_s(cmmx_t a, cmmx_t b)
102 {
103 cmmx_t d = (a|SIGN_BITS) - b;
104 cmmx_t m = ~SIGN_BITS + ((d&SIGN_BITS)>>7);
105 return ((d&m) + b) & ~SIGN_BITS;
106 }
107
108 static inline cmmx_t
109 pavgb(cmmx_t a, cmmx_t b)
110 {
111 cmmx_t ao = a & ONE_BYTES;
112 cmmx_t bo = b & ONE_BYTES;
113 return ((a^ao)>>1) + ((b^bo)>>1) + (ao|bo);
114 }
115
116 static inline cmmx_t
117 pavgb_s(cmmx_t a, cmmx_t b)
118 {
119 return ((a+b+ONE_BYTES)>>1) & ~SIGN_BITS;
120 }
121
122 static inline cmmx_t
123 p31avgb(cmmx_t a, cmmx_t b)
124 {
125 cmmx_t ao = a & (3*ONE_BYTES);
126 cmmx_t bo = b & (3*ONE_BYTES);
127 return 3*((a^ao)>>2) + ((b^bo)>>2) +
128 (((3*ao+bo+2*ONE_BYTES)>>2) & (3*ONE_BYTES));
129 }
130
131 static inline cmmx_t
132 p31avgb_s(cmmx_t a, cmmx_t b)
133 {
134 cmmx_t avg = ((a+b)>>1) & ~SIGN_BITS;
135 return pavgb_s(avg, a);
136 }
137
138 static inline unsigned long
139 psumbw(cmmx_t a)
140 {
141 cmmx_t t = (a & LOWBW_MASK) + ((a>>8) & LOWBW_MASK);
142 unsigned long ret =
143 (unsigned long)t + (unsigned long)(t >> (4*sizeof(cmmx_t)));
144 if (sizeof(cmmx_t) > 4)
145 ret += ret >> 16;
146 return ret & 0xffff;
147 }
148
149 static inline unsigned long
150 psumbw_s(cmmx_t a)
151 {
152 unsigned long ret =
153 (unsigned long)a + (unsigned long)(a >> (4*sizeof(cmmx_t)));
154 if (sizeof(cmmx_t) <= 4)
155 return (ret & 0xff) + ((ret>>8) & 0xff);
156 ret = (ret & 0xff00ff) + ((ret>>8) & 0xff00ff);
157 ret += ret >> 16;
158 return ret & 0xffff;
159 }
160
161 static inline unsigned long
162 psadbw(cmmx_t a, cmmx_t b)
163 {
164 return psumbw(pdiffub(a,b));
165 }
166
167 static inline unsigned long
168 psadbw_s(cmmx_t a, cmmx_t b)
169 {
170 return psumbw_s(pdiffub_s(a,b));
171 }
172
173 static inline cmmx_t
174 pcmpzb(cmmx_t a)
175 {
176 cmmx_t ret = (((a | SIGN_BITS) - ONE_BYTES) | a) & SIGN_BITS;
177 return ~(ret | (ret - (ret >> 7)));
178 }
179
180 static inline cmmx_t
181 pcmpeqb(cmmx_t a, cmmx_t b)
182 {
183 return pcmpzb(a ^ b);
184 }
185
186 #endif