1 module carbon.simd; 2 3 import std.stdio; 4 import core.simd; 5 import carbon.math; 6 import std.math; 7 8 version(D_SIMD): 9 10 /** 11 */ 12 float dotProduct(in Vector!(float[4])[] a, in Vector!(float[4])[] b) pure nothrow @trusted @nogc 13 in{ 14 assert(a.length == b.length); 15 } 16 body{ 17 alias V = Vector!(float[4]); 18 19 V sum; 20 sum = 0; 21 22 V* px = cast(V*)(a.ptr), 23 ph = cast(V*)(b.ptr), 24 qx = cast(V*)(px + a.length); 25 26 while(px != qx) 27 { 28 sum += *px * *ph; 29 ++px; 30 ++ph; 31 } 32 33 V ones; 34 ones.array = [1.0f, 1.0f, 1.0f, 1.0f]; 35 sum = __simd(XMM.DPPS, sum, ones, 0b11111111); 36 return sum.array[0]; 37 } 38 39 unittest 40 { 41 scope(failure) {writefln("Unittest failure :%s(%s)", __FILE__, __LINE__); stdout.flush();} 42 scope(success) {writefln("Unittest success :%s(%s)", __FILE__, __LINE__); stdout.flush();} 43 44 Vector!(float[4])[4] a; 45 Vector!(float[4])[4] b; 46 47 float[] as = a[0].ptr[0 .. 16], 48 bs = b[0].ptr[0 .. 16]; 49 50 float check = 0; 51 foreach(i; 0 .. 16){ 52 as[i] = i; 53 bs[i] = i+1; 54 check += i * (i+1); 55 } 56 57 assert(approxEqual(check, dotProduct(a, b))); 58 } 59 60 61 /** 62 a[0] <- [x.re, x.im, y.re, y.im] 63 b[0] <- [z.re, z.im, w.re, w.im] 64 65 return: sum of (x*z + y*w) 66 */ 67 cfloat cpxDotProduct(in Vector!(float[4])[] a, in Vector!(float[4])[] b) pure nothrow @trusted @nogc 68 in{ 69 assert(a.length == b.length); 70 } 71 body{ 72 Vector!(float[4]) r, q; 73 r = 0; 74 q = 0; 75 76 auto px = a.ptr, 77 ph = b.ptr, 78 qx = a.ptr + a.length; 79 80 while(px != qx) 81 { 82 Vector!(float[4]) x = *px, 83 h = *ph; 84 r += x * h; 85 86 x = __simd(XMM.SHUFPS, x, x, 0b10_11_00_01); 87 88 q += x * h; 89 90 ++px; 91 ++ph; 92 } 93 94 Vector!(float[4]) sign, ones; 95 sign.array = [1.0f, -1.0f, 1.0f, -1.0f]; 96 ones.array = [1.0f, 1.0f, 1.0f, 1.0f]; 97 98 r = __simd(XMM.DPPS, r, sign, 0b11111111); 99 q = __simd(XMM.DPPS, q, ones, 0b11111111); 100 101 return r.array[0] + q.array[0]*1i; 102 } 103 104 unittest 105 { 106 scope(failure) {writefln("Unittest failure :%s(%s)", __FILE__, __LINE__); stdout.flush();} 107 scope(success) {writefln("Unittest success :%s(%s)", __FILE__, __LINE__); stdout.flush();} 108 109 Vector!(float[4])[4] a; 110 Vector!(float[4])[4] b; 111 112 cfloat[] as = (cast(cfloat*)a[0].ptr)[0 .. 8], 113 bs = (cast(cfloat*)b[0].ptr)[0 .. 8]; 114 115 cfloat check = 0+0i; 116 foreach(i; 0 .. 8){ 117 as[i] = i + (i+1)*1i; 118 bs[i] = (i+1) + (i+2)*1i; 119 check += as[i] * bs[i]; 120 } 121 122 cfloat res = cpxDotProduct(a, b); 123 assert(approxEqual(check.re, res.re)); 124 assert(approxEqual(check.im, res.im)); 125 } 126 127 128 /** 129 */ 130 cfloat cpxDotProduct(FastComplexArray!(float, 4) a, FastComplexArray!(float, 4) b) pure nothrow @trusted @nogc 131 { 132 Vector!(float[4]) pv = 0, qv = 0; 133 size_t len = a.length; 134 auto a_r_b = a.re.ptr, 135 a_r_e = a_r_b + len, 136 a_i_b = a.im.ptr, 137 a_i_e = a_i_b + len, 138 b_r_b = b.re.ptr, 139 b_r_e = b_r_b + len, 140 b_i_b = b.im.ptr, 141 b_i_e = b_i_b + len; 142 143 while(a_r_b != a_r_e){ 144 pv += (*a_r_b * *b_r_b) - (*a_i_b * *b_i_b); 145 qv += (*a_i_b * *b_r_b) + (*a_r_b * *b_i_b); 146 147 ++a_r_b; 148 ++a_i_b; 149 ++b_r_b; 150 ++b_i_b; 151 } 152 153 154 Vector!(float[4]) ones; 155 ones.array = [1.0f, 1.0f, 1.0f, 1.0f]; 156 157 pv = __simd(XMM.DPPS, pv, ones, 0b11111111); 158 qv = __simd(XMM.DPPS, qv, ones, 0b11111111); 159 160 return pv.array[0] + qv.array[0]*1i; 161 } 162 163 164 struct FastComplexArray(E = float, size_t N = 4) 165 { 166 Vector!(E[N])[] re; 167 Vector!(E[N])[] im; 168 169 170 size_t length() const pure nothrow @safe @nogc @property 171 { 172 return re.length; 173 } 174 175 176 void opOpAssign(string op)(R r) 177 if(op == "+" || op == "-") 178 { 179 iterate!(`*a `~op~`= b;`)(re, r); 180 } 181 182 183 void opOpAssign(string op)(R r) 184 if(op == "*" || op == "/") 185 { 186 iterate!(`*a `~op~`= c; *b `~op~`= c;`)(re, im, r); 187 } 188 189 190 void opOpAssign(string op)(I r) 191 if(op == "+" || op == "-") 192 { 193 iterate!(`*a `~op~`= b;`)(im, r); 194 } 195 196 197 void opOpAssign(string op)(I r) 198 if(op == "*" || op == "/") 199 { 200 swap(re, im); 201 iterate!(`*a `~op~`= d; *b `~op~`= c;`)(re, im, r, -r); 202 } 203 204 205 void opOpAssign(string op)(C r) 206 if(op == "+" || op == "-") 207 { 208 iterate!(`*a `~op~`= c; *b `~op~`= d;`)(re, im, r.re, r.im); 209 } 210 211 212 void opOpAssign(string op)(C r) 213 if(op == "*") 214 { 215 iterate!(`auto _temp = *a * c - *b * d; *b = *a * d + *b * c; *a = _temp;`)(re, im, r.re, r.im); 216 } 217 } 218 219 /* 220 import core.bitop; 221 import std.traits; 222 import carbon.traits; 223 enum bool isSIMDArray(T) = is(T : SIMDArray!(E, N), E, size_t N); 224 225 226 struct SIMDArray(E, size_t N) 227 if(N > 0) 228 { 229 private alias V = core.simd.Vector!(E[N]); 230 231 232 enum N_log2 = core.bitop.bsr(N); 233 alias ElementType = E; 234 235 this(inout(V)[] vec, size_t size) inout 236 { 237 _arr = vec; 238 _size = size; 239 } 240 241 242 this(V[] vec) 243 { 244 this(vec, vec.length * N); 245 } 246 247 248 this(size_t n) 249 { 250 immutable oldN = n; 251 252 if(n % N != 0) 253 n = n / N + 1; 254 else 255 n /= N; 256 257 this(new V[n], oldN); 258 } 259 260 261 @property 262 inout(ElementType)[] array() inout pure nothrow @nogc { return (cast(E[])_arr)[0 .. _size]; } 263 264 265 @property 266 inout(V)[] opSlice() inout pure nothrow @safe @nogc { return _arr; } 267 268 @property 269 inout(typeof(this)) opSlice(size_t i, size_t j) inout pure nothrow @safe @nogc 270 in{ 271 assert(i == 0); 272 } 273 body { 274 return typeof(return)(this._arr, j); 275 } 276 277 278 void opOpAssign(string op)(in SIMDArray!(E, N) rhs) 279 if(is(typeof((V a, V b){ mixin(`a` ~ op ~ "=b"); }))) 280 in { 281 assert(this.length == rhs.length); 282 } 283 body { 284 mixin(`_arr[] ` ~ op ~ "= rhs._arr[];"); 285 } 286 287 288 @property 289 size_t length() const pure nothrow @safe @nogc 290 { 291 return _size; 292 } 293 294 295 ref ElementType opIndex(size_t i) pure nothrow @safe @nogc 296 in { 297 assert(i < _size); 298 } 299 body { 300 enum size_t mask = (1 << N_log2) -1; 301 302 return _arr[i >> N_log2].array[i & mask]; 303 } 304 305 306 void storeEachResult(alias fn, T...)(in SIMDArray!(E, N) lhs, in SIMDArray!(E, N), rhs, T values) 307 if(is(typeof(fn(lhs._arr[0], rhs._arr[0], values)))) 308 in { 309 assert(this.length == lhs.length && this.length == rhs.length); 310 } 311 body { 312 V* plhs = () @trusted { return lhs._arr.ptr; }(), 313 prhs = () @trusted { return rhs._arr.ptr; }(); 314 315 foreach(i, ref e; _arr){ 316 e = fn(*plhs, *prhs, values); 317 ++plhs; ++prhs; 318 } 319 } 320 321 322 void storeEachResult(alias fn)(in SIMDArray!(E, N) arr, T values) 323 if(is(typeof(fn(arr._arr[0], values)))) 324 in { 325 assert(this.length == arr.length); 326 } 327 body { 328 V* parr = () @trusted { return arr._arr.ptr; }(); 329 330 foreach(i, ref e; _arr){ 331 e = fn(*parr, values); 332 ++parr; 333 } 334 } 335 336 337 void storeEachResult(alias fn, T...)(T values) 338 if(is(typeof(fn(values)))) 339 in { 340 341 } 342 body { 343 foreach(ref e; _arr) 344 e = fn(values); 345 } 346 347 348 private: 349 V[] _arr; 350 size_t _size; 351 } 352 353 354 alias SSEArray(E) = SIMDArray!(E, 16 / E.sizeof); 355 alias SSEImaginary(E) = SIMDImaginary!(E, 16 / E.sizeof); 356 alias SSEComplex(E) = SIMDComplex!(E, 16 / E.sizeof); 357 358 alias AVXArray(E) = SIMDArray!(E, 32 / E.sizeof); 359 alias AVXImaginary(E) = SIMDImaginary!(E, 32 / E.sizeof); 360 alias AVXComplex(E) = SIMDComplex!(E, 32 / E.sizeof); 361 362 alias FastSIMDArray(E) = Select!(is(AVXArray!E), AVXArray!E, SSEArray!E); 363 alias FastSIMDImaginary(E) = Select!(is(AVXImaginary!E), AVXImaginary!E, SSEImaginary!E); 364 alias FastSIMDComplex(E) = Select!(is(AVXComplex!E), AVXComplex!E, SSEComplex!E); 365 366 367 struct SIMDImaginary(E, N) 368 { 369 Vector!(E, N) im; 370 371 Vector!(E, N) re() const @property { Vector!(E, N) dst = 0; return dst; } 372 373 void opAssign(SIMDImaginary!(E, N) img) { im = img.im; } 374 void opAssign(IMG)(IMG e) if(isBuiltInImaginary!IMG) { im = e/1i; } 375 376 SIMDImaginary opBinary(string op: "+")(SIMDImaginary rhs) const { return SIMDImaginary(im + rhs.im); } 377 SIMDImaginary opBinary(string op: "+", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDImaginary(im + (e*-1i)); } 378 SIMDComplex!(E, N) opBinary(string op: "+")(E e) const { Vector!(E, N) re = e; return SIMDComplex!(E, N)(re, im); } 379 SIMDComplex!(E, N) opBinary(string op: "+")(Vector!(E, N) e) const { Vector!(E, N) re = e; return SIMDComplex!(E, N)(re, im); } 380 SIMDComplex!(E, N) opBinary(string op: "+", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { Vector!(E, N) re = cpx.re; return SIMDComplex!(E, N)(re, im + cpx.im); } 381 SIMDComplex!(E, N) opBinary(string op: "+")(Complex!E cpx) const { Vector!(E, N) re = cpx.re; return SIMDComplex!(E, N)(re, im + cpx.im); } 382 SIMDComplex!(E, N) opBinary(string op: "+")(SIMDComplex!(E, N) cpx) const { return SIMDComplex!(E, N)(cpx.re, im + cpx.im); } 383 384 SIMDImaginary opBinary(string op: "-")(SIMDImaginary rhs) const { return SIMDImaginary(im - rhs.im); } 385 SIMDImaginary opBinary(string op: "-", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDImaginary(im - e*(-1i)); } 386 SIMDComplex!(E, N) opBinary(string op: "-")(E e) const { Vector!(E, N) re = e; return SIMDComplex!(E, N)(re, im); } 387 SIMDComplex!(E, N) opBinary(string op: "-")(Vector!(E, N) e) const { Vector!(E, N) re = e; return SIMDComplex!(E, N)(re, im); } 388 SIMDComplex!(E, N) opBinary(string op: "-", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { Vector!(E, N) re = cpx.re; return SIMDComplex!(E, N)(re, im - cpx.im); } 389 SIMDComplex!(E, N) opBinary(string op: "-")(Complex!E cpx) const { Vector!(E, N) re = cpx.re; return SIMDComplex!(E, N)(re, im - cpx.im); } 390 SIMDComplex!(E, N) opBinary(string op: "-")(SIMDComplex!(E, N) cpx) const { return SIMDComplex!(E, N)(cpx.re, im - cpx.im); } 391 392 Vector!(E, N) opBinary(string op: "*")(SIMDImaginary rhs) const { return -(im * rhs.im); } 393 Vector!(E, N) opBinary(string op: "*", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return im * (e*1i); } 394 SIMDImaginary opBinary(string op: "*")(E e) const { return SIMDImaginary(im * e); } 395 SIMDImaginary opBinary(string op: "*")(Vector!(E, N) e) const { return SIMDImaginary(im * e); } 396 SIMDComplex!(E, N) opBinary(string op: "*", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { return SIMDComplex!(E, N)(-im * cpx.im, im * cpx.re); } 397 SIMDComplex!(E, N) opBinary(string op: "*")(Complex!E cpx) const { return SIMDComplex!(E, N)(-im*cpx.im, im * cpx.re); } 398 SIMDComplex!(E, N) opBinary(string op: "*")(SIMDComplex!(E, N) cpx) const { return SIMDComplex!(E, N)(-im * cpx.im, im * cpx.re); } 399 } 400 401 402 struct SIMDComplex(E, N) 403 { 404 Vector!(E, N) re; 405 Vector!(E, N) im; 406 407 void opAssign(SIMDComplex rhs) { re = rhs.re; im = rhs.im; } 408 void opAssign(E e) { re = e; im = 0; } 409 void opAssign(Vector!(E, N) r) { re = r; im = 0; } 410 void opAssign(IMG)(IMG e) if(isBuiltInImaginary!IMG) { re = 0; im = e; } 411 void opAssign(SIMDImaginary!(E, N) e) { re = 0; im = e; } 412 void opAssign(Complex!E cpx) { re = cpx.re; im = cpx.im; } 413 void opAssign(CPX)(CPX cpx) if(isBuiltInComplex!CPX) { re = cpx.re; im = cpx.im; } 414 415 SIMDComplex opBinary(string op: "+")(SIMDComplex rhs) const { return SIMDComplex(re + rhs.re, im + rhs.im); } 416 SIMDComplex opBinary(string op: "+")(E e) const { return SIMDComplex(re + e, im); } 417 SIMDComplex opBinary(string op: "+")(Vector!(E, N) r) const { return SIMDComplex(re + r, im); } 418 SIMDComplex opBinary(string op: "+", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDComplex(re, im + e/1i); } 419 SIMDComplex opBinary(string op: "+")(SIMDImaginary!(E, N) rhs) const { return SIMDComplex(re, im + rhs.im); } 420 SIMDComplex opBinary(string op: "+")(Complex!E cpx) const { return SIMDComplex(re + cpx.re, im + cpx.im); } 421 SIMDComplex opBinary(string op: "+", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { return SIMDComplex(re + cpx.re, im + cpx.im); } 422 423 SIMDComplex opBinary(string op: "-")(SIMDComplex rhs) const { return SIMDComplex(re - rhs.re, im - rhs.im); } 424 SIMDComplex opBinary(string op: "-")(E e) const { return SIMDComplex(re - e, im); } 425 SIMDComplex opBinary(string op: "-")(Vector!(E, N) r) const { return SIMDComplex(re - r, im); } 426 SIMDComplex opBinary(string op: "-", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDComplex(re, im - e/1i); } 427 SIMDComplex opBinary(string op: "-")(SIMDImaginary!(E, N) rhs) const { return SIMDComplex(re, im - rhs.im); } 428 SIMDComplex opBinary(string op: "-")(Complex!E cpx) const { return SIMDComplex(re - cpx.re, im - cpx.im); } 429 SIMDComplex opBinary(string op: "-", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { return SIMDComplex(re - cpx.re, im - cpx.im); } 430 431 SIMDComplex opBinary(string op: "*")(SIMDComplex rhs) const { return SIMDComplex(re * rhs.re - im * rhs.im, re * rhs.im + im * rhs.re); } 432 SIMDComplex opBinary(string op: "*")(E e) const { return SIMDComplex(re * e, im * e); } 433 SIMDComplex opBinary(string op: "*")(Vector!(E, N) r) const { return SIMDComplex(re * r, im * r); } 434 SIMDComplex opBinary(string op: "*", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDComplex(im * (e*1i), re * (e/1i)); } 435 SIMDComplex opBinary(string op: "*")(SIMDImaginary!(E, N) rhs) const { return SIMDComplex(-im * rhs.im, re * rhs.im); } 436 SIMDComplex opBinary(string op: "*")(Complex!E cpx) const { return SIMDComplex(re * cpx.re - im * cpx.im, re * cpx.im + im * cpx.re); } 437 SIMDComplex opBinary(string op: "*", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { return SIMDComplex(re * cpx.re - im * cpx.im, re * cpx.im + im * cpx.re); } 438 } 439 */