1 module carbon.simd;
2 
3 import std.stdio;
4 import core.simd;
5 import carbon.math;
6 import std.math;
7 
8 version(D_SIMD):
9 
10 /**
11 */
12 float dotProduct(in Vector!(float[4])[] a, in Vector!(float[4])[] b) pure nothrow @trusted @nogc
13 in{
14     assert(a.length == b.length);
15 }
16 body{
17     alias V = Vector!(float[4]);
18 
19     V sum;
20     sum = 0;
21 
22     V* px = cast(V*)(a.ptr),
23        ph = cast(V*)(b.ptr),
24        qx = cast(V*)(px + a.length);
25 
26     while(px != qx)
27     {
28         sum += *px * *ph;
29         ++px;
30         ++ph;
31     }
32 
33     V ones;
34     ones.array = [1.0f, 1.0f, 1.0f, 1.0f];
35     sum = __simd(XMM.DPPS, sum, ones, 0b11111111);
36     return sum.array[0];
37 }
38 
39 unittest
40 {
41     scope(failure) {writefln("Unittest failure :%s(%s)", __FILE__, __LINE__); stdout.flush();}
42     scope(success) {writefln("Unittest success :%s(%s)", __FILE__, __LINE__); stdout.flush();}
43 
44     Vector!(float[4])[4] a;
45     Vector!(float[4])[4] b;
46 
47     float[] as = a[0].ptr[0 .. 16],
48             bs = b[0].ptr[0 .. 16];
49 
50     float check = 0;
51     foreach(i; 0 .. 16){
52         as[i] = i;
53         bs[i] = i+1;
54         check += i * (i+1);
55     }
56 
57     assert(approxEqual(check, dotProduct(a, b)));
58 }
59 
60 
61 /**
62 a[0] <- [x.re, x.im, y.re, y.im]
63 b[0] <- [z.re, z.im, w.re, w.im]
64 
65 return: sum of (x*z + y*w)
66 */
67 cfloat cpxDotProduct(in Vector!(float[4])[] a, in Vector!(float[4])[] b) pure nothrow @trusted @nogc
68 in{
69     assert(a.length == b.length);
70 }
71 body{
72     Vector!(float[4]) r, q;
73     r = 0;
74     q = 0;
75 
76     auto px = a.ptr,
77          ph = b.ptr,
78          qx = a.ptr + a.length;
79 
80     while(px != qx)
81     {
82         Vector!(float[4]) x = *px,
83                           h = *ph;
84         r += x * h;
85 
86         x = __simd(XMM.SHUFPS, x, x, 0b10_11_00_01);
87 
88         q += x * h;
89 
90         ++px;
91         ++ph;
92     }
93 
94     Vector!(float[4]) sign, ones;
95     sign.array = [1.0f, -1.0f, 1.0f, -1.0f];
96     ones.array = [1.0f, 1.0f, 1.0f, 1.0f];
97 
98     r = __simd(XMM.DPPS, r, sign, 0b11111111);
99     q = __simd(XMM.DPPS, q, ones, 0b11111111);
100 
101     return r.array[0] + q.array[0]*1i;
102 }
103 
104 unittest
105 {
106     scope(failure) {writefln("Unittest failure :%s(%s)", __FILE__, __LINE__); stdout.flush();}
107     scope(success) {writefln("Unittest success :%s(%s)", __FILE__, __LINE__); stdout.flush();}
108 
109     Vector!(float[4])[4] a;
110     Vector!(float[4])[4] b;
111 
112     cfloat[] as = (cast(cfloat*)a[0].ptr)[0 .. 8],
113             bs = (cast(cfloat*)b[0].ptr)[0 .. 8];
114 
115     cfloat check = 0+0i;
116     foreach(i; 0 .. 8){
117         as[i] = i + (i+1)*1i;
118         bs[i] = (i+1) + (i+2)*1i;
119         check += as[i] * bs[i];
120     }
121 
122     cfloat res = cpxDotProduct(a, b);
123     assert(approxEqual(check.re, res.re));
124     assert(approxEqual(check.im, res.im));
125 }
126 
127 
128 /**
129 */
130 cfloat cpxDotProduct(FastComplexArray!(float, 4) a, FastComplexArray!(float, 4) b) pure nothrow @trusted @nogc
131 {
132     Vector!(float[4]) pv = 0, qv = 0;
133     size_t len = a.length;
134     auto a_r_b = a.re.ptr,
135          a_r_e = a_r_b + len,
136          a_i_b = a.im.ptr,
137          a_i_e = a_i_b + len,
138          b_r_b = b.re.ptr,
139          b_r_e = b_r_b + len,
140          b_i_b = b.im.ptr,
141          b_i_e = b_i_b + len;
142 
143     while(a_r_b != a_r_e){
144         pv += (*a_r_b * *b_r_b) - (*a_i_b * *b_i_b);
145         qv += (*a_i_b * *b_r_b) + (*a_r_b * *b_i_b);
146 
147         ++a_r_b;
148         ++a_i_b;
149         ++b_r_b;
150         ++b_i_b;
151     }
152 
153 
154     Vector!(float[4]) ones;
155     ones.array = [1.0f, 1.0f, 1.0f, 1.0f];
156 
157     pv = __simd(XMM.DPPS, pv, ones, 0b11111111);
158     qv = __simd(XMM.DPPS, qv, ones, 0b11111111);
159 
160     return pv.array[0] + qv.array[0]*1i;
161 }
162 
163 
164 struct FastComplexArray(E = float, size_t N = 4)
165 {
166     Vector!(E[N])[] re;
167     Vector!(E[N])[] im;
168 
169 
170     size_t length() const pure nothrow @safe @nogc @property
171     {
172         return re.length;
173     }
174 
175 
176     void opOpAssign(string op)(R r)
177     if(op == "+" || op == "-")
178     {
179         iterate!(`*a `~op~`= b;`)(re, r);
180     }
181 
182 
183     void opOpAssign(string op)(R r)
184     if(op == "*" || op == "/")
185     {
186         iterate!(`*a `~op~`= c; *b `~op~`= c;`)(re, im, r);
187     }
188 
189 
190     void opOpAssign(string op)(I r)
191     if(op == "+" || op == "-")
192     {
193         iterate!(`*a `~op~`= b;`)(im, r);
194     }
195 
196 
197     void opOpAssign(string op)(I r)
198     if(op == "*" || op == "/")
199     {
200         swap(re, im);
201         iterate!(`*a `~op~`= d; *b `~op~`= c;`)(re, im, r, -r);
202     }
203 
204 
205     void opOpAssign(string op)(C r)
206     if(op == "+" || op == "-")
207     {
208         iterate!(`*a `~op~`= c; *b `~op~`= d;`)(re, im, r.re, r.im);
209     }
210 
211 
212     void opOpAssign(string op)(C r)
213     if(op == "*")
214     {
215         iterate!(`auto _temp = *a * c - *b * d; *b = *a * d + *b * c; *a = _temp;`)(re, im, r.re, r.im);
216     }
217 }
218 
219 /*
220 import core.bitop;
221 import std.traits;
222 import carbon.traits;
223 enum bool isSIMDArray(T) = is(T : SIMDArray!(E, N), E, size_t N);
224 
225 
226 struct SIMDArray(E, size_t N)
227 if(N > 0)
228 {
229     private alias V = core.simd.Vector!(E[N]);
230 
231 
232     enum N_log2 = core.bitop.bsr(N);
233     alias ElementType = E;
234 
235     this(inout(V)[] vec, size_t size) inout
236     {
237         _arr = vec;
238         _size = size;
239     }
240 
241 
242     this(V[] vec)
243     {
244         this(vec, vec.length * N);
245     }
246 
247 
248     this(size_t n)
249     {
250         immutable oldN = n;
251 
252         if(n % N != 0)
253             n = n / N + 1;
254         else
255             n /= N;
256 
257         this(new V[n], oldN);
258     }
259 
260 
261     @property
262     inout(ElementType)[] array() inout pure nothrow @nogc { return (cast(E[])_arr)[0 .. _size]; }
263 
264 
265     @property
266     inout(V)[] opSlice() inout pure nothrow @safe @nogc { return _arr; }
267 
268     @property
269     inout(typeof(this)) opSlice(size_t i, size_t j) inout pure nothrow @safe @nogc
270     in{
271         assert(i == 0);
272     }
273     body {
274         return typeof(return)(this._arr, j);
275     }
276 
277 
278     void opOpAssign(string op)(in SIMDArray!(E, N) rhs)
279     if(is(typeof((V a, V b){ mixin(`a` ~ op ~ "=b"); })))
280     in {
281         assert(this.length == rhs.length);
282     }
283     body {
284         mixin(`_arr[] ` ~ op ~ "= rhs._arr[];");
285     }
286 
287 
288     @property
289     size_t length() const pure nothrow @safe @nogc
290     {
291         return _size;
292     }
293 
294 
295     ref ElementType opIndex(size_t i) pure nothrow @safe @nogc
296     in {
297         assert(i < _size);
298     }
299     body {
300         enum size_t mask = (1 << N_log2) -1;
301 
302         return _arr[i >> N_log2].array[i & mask];
303     }
304 
305 
306     void storeEachResult(alias fn, T...)(in SIMDArray!(E, N) lhs, in SIMDArray!(E, N), rhs, T values)
307     if(is(typeof(fn(lhs._arr[0], rhs._arr[0], values))))
308     in {
309         assert(this.length == lhs.length && this.length == rhs.length);
310     }
311     body {
312         V* plhs = () @trusted { return lhs._arr.ptr; }(),
313            prhs = () @trusted { return rhs._arr.ptr; }();
314 
315         foreach(i, ref e; _arr){
316             e = fn(*plhs, *prhs, values);
317             ++plhs; ++prhs;
318         }
319     }
320 
321 
322     void storeEachResult(alias fn)(in SIMDArray!(E, N) arr, T values)
323     if(is(typeof(fn(arr._arr[0], values))))
324     in {
325         assert(this.length == arr.length);
326     }
327     body {
328         V* parr = () @trusted { return arr._arr.ptr; }();
329 
330         foreach(i, ref e; _arr){
331             e = fn(*parr, values);
332             ++parr;
333         }
334     }
335 
336 
337     void storeEachResult(alias fn, T...)(T values)
338     if(is(typeof(fn(values))))
339     in {
340 
341     }
342     body {
343         foreach(ref e; _arr)
344             e = fn(values);
345     }
346 
347 
348   private:
349     V[] _arr;
350     size_t _size;
351 }
352 
353 
354 alias SSEArray(E) = SIMDArray!(E, 16 / E.sizeof);
355 alias SSEImaginary(E) = SIMDImaginary!(E, 16 / E.sizeof);
356 alias SSEComplex(E) = SIMDComplex!(E, 16 / E.sizeof);
357 
358 alias AVXArray(E) = SIMDArray!(E, 32 / E.sizeof);
359 alias AVXImaginary(E) = SIMDImaginary!(E, 32 / E.sizeof);
360 alias AVXComplex(E) = SIMDComplex!(E, 32 / E.sizeof);
361 
362 alias FastSIMDArray(E) = Select!(is(AVXArray!E), AVXArray!E, SSEArray!E);
363 alias FastSIMDImaginary(E) = Select!(is(AVXImaginary!E), AVXImaginary!E, SSEImaginary!E);
364 alias FastSIMDComplex(E) = Select!(is(AVXComplex!E), AVXComplex!E, SSEComplex!E);
365 
366 
367 struct SIMDImaginary(E, N)
368 {
369     Vector!(E, N) im;
370 
371     Vector!(E, N) re() const @property { Vector!(E, N) dst = 0; return dst; }
372 
373     void opAssign(SIMDImaginary!(E, N) img) { im = img.im; }
374     void opAssign(IMG)(IMG e) if(isBuiltInImaginary!IMG) { im = e/1i; }
375 
376     SIMDImaginary opBinary(string op: "+")(SIMDImaginary rhs) const { return SIMDImaginary(im + rhs.im); }
377     SIMDImaginary opBinary(string op: "+", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDImaginary(im + (e*-1i)); }
378     SIMDComplex!(E, N) opBinary(string op: "+")(E e) const { Vector!(E, N) re = e; return SIMDComplex!(E, N)(re, im); }
379     SIMDComplex!(E, N) opBinary(string op: "+")(Vector!(E, N) e) const { Vector!(E, N) re = e; return SIMDComplex!(E, N)(re, im); }
380     SIMDComplex!(E, N) opBinary(string op: "+", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { Vector!(E, N) re = cpx.re; return SIMDComplex!(E, N)(re, im + cpx.im); }
381     SIMDComplex!(E, N) opBinary(string op: "+")(Complex!E cpx) const { Vector!(E, N) re = cpx.re; return SIMDComplex!(E, N)(re, im + cpx.im); }
382     SIMDComplex!(E, N) opBinary(string op: "+")(SIMDComplex!(E, N) cpx) const { return SIMDComplex!(E, N)(cpx.re, im + cpx.im); }
383 
384     SIMDImaginary opBinary(string op: "-")(SIMDImaginary rhs) const { return SIMDImaginary(im - rhs.im); }
385     SIMDImaginary opBinary(string op: "-", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDImaginary(im - e*(-1i)); }
386     SIMDComplex!(E, N) opBinary(string op: "-")(E e) const { Vector!(E, N) re = e; return SIMDComplex!(E, N)(re, im); }
387     SIMDComplex!(E, N) opBinary(string op: "-")(Vector!(E, N) e) const { Vector!(E, N) re = e; return SIMDComplex!(E, N)(re, im); }
388     SIMDComplex!(E, N) opBinary(string op: "-", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { Vector!(E, N) re = cpx.re; return SIMDComplex!(E, N)(re, im - cpx.im); }
389     SIMDComplex!(E, N) opBinary(string op: "-")(Complex!E cpx) const { Vector!(E, N) re = cpx.re; return SIMDComplex!(E, N)(re, im - cpx.im); }
390     SIMDComplex!(E, N) opBinary(string op: "-")(SIMDComplex!(E, N) cpx) const { return SIMDComplex!(E, N)(cpx.re, im - cpx.im); }
391 
392     Vector!(E, N) opBinary(string op: "*")(SIMDImaginary rhs) const { return -(im * rhs.im); }
393     Vector!(E, N) opBinary(string op: "*", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return im * (e*1i); }
394     SIMDImaginary opBinary(string op: "*")(E e) const { return SIMDImaginary(im * e); }
395     SIMDImaginary opBinary(string op: "*")(Vector!(E, N) e) const { return SIMDImaginary(im * e); }
396     SIMDComplex!(E, N) opBinary(string op: "*", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { return SIMDComplex!(E, N)(-im * cpx.im, im * cpx.re); }
397     SIMDComplex!(E, N) opBinary(string op: "*")(Complex!E cpx) const { return SIMDComplex!(E, N)(-im*cpx.im, im * cpx.re); }
398     SIMDComplex!(E, N) opBinary(string op: "*")(SIMDComplex!(E, N) cpx) const { return SIMDComplex!(E, N)(-im * cpx.im, im * cpx.re); }
399 }
400 
401 
402 struct SIMDComplex(E, N)
403 {
404     Vector!(E, N) re;
405     Vector!(E, N) im;
406 
407     void opAssign(SIMDComplex rhs) { re = rhs.re; im = rhs.im; }
408     void opAssign(E e) { re = e; im = 0; }
409     void opAssign(Vector!(E, N) r) { re = r; im = 0; }
410     void opAssign(IMG)(IMG e) if(isBuiltInImaginary!IMG) { re = 0; im = e; }
411     void opAssign(SIMDImaginary!(E, N) e) { re = 0; im = e; }
412     void opAssign(Complex!E cpx) { re = cpx.re; im = cpx.im; }
413     void opAssign(CPX)(CPX cpx) if(isBuiltInComplex!CPX) { re = cpx.re; im = cpx.im; }
414 
415     SIMDComplex opBinary(string op: "+")(SIMDComplex rhs) const { return SIMDComplex(re + rhs.re, im + rhs.im); }
416     SIMDComplex opBinary(string op: "+")(E e) const { return SIMDComplex(re + e, im); }
417     SIMDComplex opBinary(string op: "+")(Vector!(E, N) r) const { return SIMDComplex(re + r, im); }
418     SIMDComplex opBinary(string op: "+", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDComplex(re, im + e/1i); }
419     SIMDComplex opBinary(string op: "+")(SIMDImaginary!(E, N) rhs) const { return SIMDComplex(re, im + rhs.im); }
420     SIMDComplex opBinary(string op: "+")(Complex!E cpx) const { return SIMDComplex(re + cpx.re, im + cpx.im); }
421     SIMDComplex opBinary(string op: "+", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { return SIMDComplex(re + cpx.re, im + cpx.im); }
422 
423     SIMDComplex opBinary(string op: "-")(SIMDComplex rhs) const { return SIMDComplex(re - rhs.re, im - rhs.im); }
424     SIMDComplex opBinary(string op: "-")(E e) const { return SIMDComplex(re - e, im); }
425     SIMDComplex opBinary(string op: "-")(Vector!(E, N) r) const { return SIMDComplex(re - r, im); }
426     SIMDComplex opBinary(string op: "-", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDComplex(re, im - e/1i); }
427     SIMDComplex opBinary(string op: "-")(SIMDImaginary!(E, N) rhs) const { return SIMDComplex(re, im - rhs.im); }
428     SIMDComplex opBinary(string op: "-")(Complex!E cpx) const { return SIMDComplex(re - cpx.re, im - cpx.im); }
429     SIMDComplex opBinary(string op: "-", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { return SIMDComplex(re - cpx.re, im - cpx.im); }
430 
431     SIMDComplex opBinary(string op: "*")(SIMDComplex rhs) const { return SIMDComplex(re * rhs.re - im * rhs.im, re * rhs.im + im * rhs.re); }
432     SIMDComplex opBinary(string op: "*")(E e) const { return SIMDComplex(re * e, im * e); }
433     SIMDComplex opBinary(string op: "*")(Vector!(E, N) r) const { return SIMDComplex(re * r, im * r); }
434     SIMDComplex opBinary(string op: "*", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDComplex(im * (e*1i), re * (e/1i)); }
435     SIMDComplex opBinary(string op: "*")(SIMDImaginary!(E, N) rhs) const { return SIMDComplex(-im * rhs.im, re * rhs.im); }
436     SIMDComplex opBinary(string op: "*")(Complex!E cpx) const { return SIMDComplex(re * cpx.re - im * cpx.im, re * cpx.im + im * cpx.re); }
437     SIMDComplex opBinary(string op: "*", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { return SIMDComplex(re * cpx.re - im * cpx.im, re * cpx.im + im * cpx.re); }
438 }
439 */