1 module carbon.simd;
2 
3 import std.stdio;
4 import core.simd;
5 import carbon.math;
6 import std.math;
7 
8 version(D_SIMD):
9 
10 /**
11 */
12 float dotProduct(in Vector!(float[4])[] a, in Vector!(float[4])[] b) pure nothrow @trusted @nogc
13 in{
14     assert(a.length == b.length);
15 }
16 body{
17     alias V = Vector!(float[4]);
18 
19     V sum;
20     sum = 0;
21 
22     V* px = cast(V*)(a.ptr),
23        ph = cast(V*)(b.ptr),
24        qx = cast(V*)(px + a.length);
25 
26     while(px != qx)
27     {
28         sum += *px * *ph;
29         ++px;
30         ++ph;
31     }
32 
33     V ones;
34     ones.array = [1.0f, 1.0f, 1.0f, 1.0f];
35     sum = __simd(XMM.DPPS, sum, ones, 0b11111111);
36     return sum.array[0];
37 }
38 
39 unittest
40 {
41     scope(failure) {writefln("Unittest failure :%s(%s)", __FILE__, __LINE__); stdout.flush();}
42     scope(success) {writefln("Unittest success :%s(%s)", __FILE__, __LINE__); stdout.flush();}
43 
44     Vector!(float[4])[4] a;
45     Vector!(float[4])[4] b;
46 
47     float[] as = a[0].ptr[0 .. 16],
48             bs = b[0].ptr[0 .. 16];
49 
50     float check = 0;
51     foreach(i; 0 .. 16){
52         as[i] = i;
53         bs[i] = i+1;
54         check += i * (i+1);
55     }
56 
57     assert(approxEqual(check, dotProduct(a, b)));
58 }
59 
60 
61 /**
62 a[0] <- [x.re, x.im, y.re, y.im]
63 b[0] <- [z.re, z.im, w.re, w.im]
64 
65 return: sum of (x*z + y*w)
66 */
67 cfloat cpxDotProduct(in Vector!(float[4])[] a, in Vector!(float[4])[] b) pure nothrow @trusted @nogc
68 in{
69     assert(a.length == b.length);
70 }
71 body{
72     Vector!(float[4]) r, q;
73     r = 0;
74     q = 0;
75 
76     auto px = a.ptr,
77          ph = b.ptr,
78          qx = a.ptr + a.length;
79 
80     while(px != qx)
81     {
82         Vector!(float[4]) x = *px,
83                           h = *ph;
84         r += x * h;
85 
86         x = __simd(XMM.SHUFPS, x, x, 0b10_11_00_01);
87 
88         q += x * h;
89 
90         ++px;
91         ++ph;
92     }
93 
94     Vector!(float[4]) sign, ones;
95     sign.array = [1.0f, -1.0f, 1.0f, -1.0f];
96     ones.array = [1.0f, 1.0f, 1.0f, 1.0f];
97 
98     r = __simd(XMM.DPPS, r, sign, 0b11111111);
99     q = __simd(XMM.DPPS, q, ones, 0b11111111);
100 
101     return r.array[0] + q.array[0]*1i;
102 }
103 
104 unittest
105 {
106     scope(failure) {writefln("Unittest failure :%s(%s)", __FILE__, __LINE__); stdout.flush();}
107     scope(success) {writefln("Unittest success :%s(%s)", __FILE__, __LINE__); stdout.flush();}
108 
109     Vector!(float[4])[4] a;
110     Vector!(float[4])[4] b;
111 
112     cfloat[] as = (cast(cfloat*)a[0].ptr)[0 .. 8],
113             bs = (cast(cfloat*)b[0].ptr)[0 .. 8];
114 
115     cfloat check = 0+0i;
116     foreach(i; 0 .. 8){
117         as[i] = i + (i+1)*1i;
118         bs[i] = (i+1) + (i+2)*1i;
119         check += as[i] * bs[i];
120     }
121 
122     cfloat res = cpxDotProduct(a, b);
123     assert(approxEqual(check.re, res.re));
124     assert(approxEqual(check.im, res.im));
125 }
126 
127 /*
128 import core.bitop;
129 import std.traits;
130 import carbon.traits;
131 enum bool isSIMDArray(T) = is(T : SIMDArray!(E, N), E, size_t N);
132 
133 
134 struct SIMDArray(E, size_t N)
135 if(N > 0)
136 {
137     private alias V = core.simd.Vector!(E[N]);
138 
139 
140     enum N_log2 = core.bitop.bsr(N);
141     alias ElementType = E;
142 
143     this(inout(V)[] vec, size_t size) inout
144     {
145         _arr = vec;
146         _size = size;
147     }
148 
149 
150     this(V[] vec)
151     {
152         this(vec, vec.length * N);
153     }
154 
155 
156     this(size_t n)
157     {
158         immutable oldN = n;
159 
160         if(n % N != 0)
161             n = n / N + 1;
162         else
163             n /= N;
164 
165         this(new V[n], oldN);
166     }
167 
168 
169     @property
170     inout(ElementType)[] array() inout pure nothrow @nogc { return (cast(E[])_arr)[0 .. _size]; }
171 
172 
173     @property
174     inout(V)[] opSlice() inout pure nothrow @safe @nogc { return _arr; }
175 
176     @property
177     inout(typeof(this)) opSlice(size_t i, size_t j) inout pure nothrow @safe @nogc
178     in{
179         assert(i == 0);
180     }
181     body {
182         return typeof(return)(this._arr, j);
183     }
184 
185 
186     void opOpAssign(string op)(in SIMDArray!(E, N) rhs)
187     if(is(typeof((V a, V b){ mixin(`a` ~ op ~ "=b"); })))
188     in {
189         assert(this.length == rhs.length);
190     }
191     body {
192         mixin(`_arr[] ` ~ op ~ "= rhs._arr[];");
193     }
194 
195 
196     @property
197     size_t length() const pure nothrow @safe @nogc
198     {
199         return _size;
200     }
201 
202 
203     ref ElementType opIndex(size_t i) pure nothrow @safe @nogc
204     in {
205         assert(i < _size);
206     }
207     body {
208         enum size_t mask = (1 << N_log2) -1;
209 
210         return _arr[i >> N_log2].array[i & mask];
211     }
212 
213 
214     void storeEachResult(alias fn, T...)(in SIMDArray!(E, N) lhs, in SIMDArray!(E, N), rhs, T values)
215     if(is(typeof(fn(lhs._arr[0], rhs._arr[0], values))))
216     in {
217         assert(this.length == lhs.length && this.length == rhs.length);
218     }
219     body {
220         V* plhs = () @trusted { return lhs._arr.ptr; }(),
221            prhs = () @trusted { return rhs._arr.ptr; }();
222 
223         foreach(i, ref e; _arr){
224             e = fn(*plhs, *prhs, values);
225             ++plhs; ++prhs;
226         }
227     }
228 
229 
230     void storeEachResult(alias fn)(in SIMDArray!(E, N) arr, T values)
231     if(is(typeof(fn(arr._arr[0], values))))
232     in {
233         assert(this.length == arr.length);
234     }
235     body {
236         V* parr = () @trusted { return arr._arr.ptr; }();
237 
238         foreach(i, ref e; _arr){
239             e = fn(*parr, values);
240             ++parr;
241         }
242     }
243 
244 
245     void storeEachResult(alias fn, T...)(T values)
246     if(is(typeof(fn(values))))
247     in {
248 
249     }
250     body {
251         foreach(ref e; _arr)
252             e = fn(values);
253     }
254 
255 
256   private:
257     V[] _arr;
258     size_t _size;
259 }
260 
261 
262 alias SSEArray(E) = SIMDArray!(E, 16 / E.sizeof);
263 alias SSEImaginary(E) = SIMDImaginary!(E, 16 / E.sizeof);
264 alias SSEComplex(E) = SIMDComplex!(E, 16 / E.sizeof);
265 
266 alias AVXArray(E) = SIMDArray!(E, 32 / E.sizeof);
267 alias AVXImaginary(E) = SIMDImaginary!(E, 32 / E.sizeof);
268 alias AVXComplex(E) = SIMDComplex!(E, 32 / E.sizeof);
269 
270 alias FastSIMDArray(E) = Select!(is(AVXArray!E), AVXArray!E, SSEArray!E);
271 alias FastSIMDImaginary(E) = Select!(is(AVXImaginary!E), AVXImaginary!E, SSEImaginary!E);
272 alias FastSIMDComplex(E) = Select!(is(AVXComplex!E), AVXComplex!E, SSEComplex!E);
273 
274 
275 struct SIMDImaginary(E, N)
276 {
277     Vector!(E, N) im;
278 
279     Vector!(E, N) re() const @property { Vector!(E, N) dst = 0; return dst; }
280 
281     void opAssign(SIMDImaginary!(E, N) img) { im = img.im; }
282     void opAssign(IMG)(IMG e) if(isBuiltInImaginary!IMG) { im = e/1i; }
283 
284     SIMDImaginary opBinary(string op: "+")(SIMDImaginary rhs) const { return SIMDImaginary(im + rhs.im); }
285     SIMDImaginary opBinary(string op: "+", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDImaginary(im + (e*-1i)); }
286     SIMDComplex!(E, N) opBinary(string op: "+")(E e) const { Vector!(E, N) re = e; return SIMDComplex!(E, N)(re, im); }
287     SIMDComplex!(E, N) opBinary(string op: "+")(Vector!(E, N) e) const { Vector!(E, N) re = e; return SIMDComplex!(E, N)(re, im); }
288     SIMDComplex!(E, N) opBinary(string op: "+", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { Vector!(E, N) re = cpx.re; return SIMDComplex!(E, N)(re, im + cpx.im); }
289     SIMDComplex!(E, N) opBinary(string op: "+")(Complex!E cpx) const { Vector!(E, N) re = cpx.re; return SIMDComplex!(E, N)(re, im + cpx.im); }
290     SIMDComplex!(E, N) opBinary(string op: "+")(SIMDComplex!(E, N) cpx) const { return SIMDComplex!(E, N)(cpx.re, im + cpx.im); }
291 
292     SIMDImaginary opBinary(string op: "-")(SIMDImaginary rhs) const { return SIMDImaginary(im - rhs.im); }
293     SIMDImaginary opBinary(string op: "-", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDImaginary(im - e*(-1i)); }
294     SIMDComplex!(E, N) opBinary(string op: "-")(E e) const { Vector!(E, N) re = e; return SIMDComplex!(E, N)(re, im); }
295     SIMDComplex!(E, N) opBinary(string op: "-")(Vector!(E, N) e) const { Vector!(E, N) re = e; return SIMDComplex!(E, N)(re, im); }
296     SIMDComplex!(E, N) opBinary(string op: "-", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { Vector!(E, N) re = cpx.re; return SIMDComplex!(E, N)(re, im - cpx.im); }
297     SIMDComplex!(E, N) opBinary(string op: "-")(Complex!E cpx) const { Vector!(E, N) re = cpx.re; return SIMDComplex!(E, N)(re, im - cpx.im); }
298     SIMDComplex!(E, N) opBinary(string op: "-")(SIMDComplex!(E, N) cpx) const { return SIMDComplex!(E, N)(cpx.re, im - cpx.im); }
299 
300     Vector!(E, N) opBinary(string op: "*")(SIMDImaginary rhs) const { return -(im * rhs.im); }
301     Vector!(E, N) opBinary(string op: "*", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return im * (e*1i); }
302     SIMDImaginary opBinary(string op: "*")(E e) const { return SIMDImaginary(im * e); }
303     SIMDImaginary opBinary(string op: "*")(Vector!(E, N) e) const { return SIMDImaginary(im * e); }
304     SIMDComplex!(E, N) opBinary(string op: "*", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { return SIMDComplex!(E, N)(-im * cpx.im, im * cpx.re); }
305     SIMDComplex!(E, N) opBinary(string op: "*")(Complex!E cpx) const { return SIMDComplex!(E, N)(-im*cpx.im, im * cpx.re); }
306     SIMDComplex!(E, N) opBinary(string op: "*")(SIMDComplex!(E, N) cpx) const { return SIMDComplex!(E, N)(-im * cpx.im, im * cpx.re); }
307 }
308 
309 
310 struct SIMDComplex(E, N)
311 {
312     Vector!(E, N) re;
313     Vector!(E, N) im;
314 
315     void opAssign(SIMDComplex rhs) { re = rhs.re; im = rhs.im; }
316     void opAssign(E e) { re = e; im = 0; }
317     void opAssign(Vector!(E, N) r) { re = r; im = 0; }
318     void opAssign(IMG)(IMG e) if(isBuiltInImaginary!IMG) { re = 0; im = e; }
319     void opAssign(SIMDImaginary!(E, N) e) { re = 0; im = e; }
320     void opAssign(Complex!E cpx) { re = cpx.re; im = cpx.im; }
321     void opAssign(CPX)(CPX cpx) if(isBuiltInComplex!CPX) { re = cpx.re; im = cpx.im; }
322 
323     SIMDComplex opBinary(string op: "+")(SIMDComplex rhs) const { return SIMDComplex(re + rhs.re, im + rhs.im); }
324     SIMDComplex opBinary(string op: "+")(E e) const { return SIMDComplex(re + e, im); }
325     SIMDComplex opBinary(string op: "+")(Vector!(E, N) r) const { return SIMDComplex(re + r, im); }
326     SIMDComplex opBinary(string op: "+", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDComplex(re, im + e/1i); }
327     SIMDComplex opBinary(string op: "+")(SIMDImaginary!(E, N) rhs) const { return SIMDComplex(re, im + rhs.im); }
328     SIMDComplex opBinary(string op: "+")(Complex!E cpx) const { return SIMDComplex(re + cpx.re, im + cpx.im); }
329     SIMDComplex opBinary(string op: "+", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { return SIMDComplex(re + cpx.re, im + cpx.im); }
330 
331     SIMDComplex opBinary(string op: "-")(SIMDComplex rhs) const { return SIMDComplex(re - rhs.re, im - rhs.im); }
332     SIMDComplex opBinary(string op: "-")(E e) const { return SIMDComplex(re - e, im); }
333     SIMDComplex opBinary(string op: "-")(Vector!(E, N) r) const { return SIMDComplex(re - r, im); }
334     SIMDComplex opBinary(string op: "-", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDComplex(re, im - e/1i); }
335     SIMDComplex opBinary(string op: "-")(SIMDImaginary!(E, N) rhs) const { return SIMDComplex(re, im - rhs.im); }
336     SIMDComplex opBinary(string op: "-")(Complex!E cpx) const { return SIMDComplex(re - cpx.re, im - cpx.im); }
337     SIMDComplex opBinary(string op: "-", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { return SIMDComplex(re - cpx.re, im - cpx.im); }
338 
339     SIMDComplex opBinary(string op: "*")(SIMDComplex rhs) const { return SIMDComplex(re * rhs.re - im * rhs.im, re * rhs.im + im * rhs.re); }
340     SIMDComplex opBinary(string op: "*")(E e) const { return SIMDComplex(re * e, im * e); }
341     SIMDComplex opBinary(string op: "*")(Vector!(E, N) r) const { return SIMDComplex(re * r, im * r); }
342     SIMDComplex opBinary(string op: "*", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDComplex(im * (e*1i), re * (e/1i)); }
343     SIMDComplex opBinary(string op: "*")(SIMDImaginary!(E, N) rhs) const { return SIMDComplex(-im * rhs.im, re * rhs.im); }
344     SIMDComplex opBinary(string op: "*")(Complex!E cpx) const { return SIMDComplex(re * cpx.re - im * cpx.im, re * cpx.im + im * cpx.re); }
345     SIMDComplex opBinary(string op: "*", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { return SIMDComplex(re * cpx.re - im * cpx.im, re * cpx.im + im * cpx.re); }
346 }
347 */