1 module carbon.simd; 2 3 import std.stdio; 4 import core.simd; 5 import carbon.math; 6 import std.math; 7 8 version(D_SIMD): 9 10 /** 11 */ 12 float dotProduct(in Vector!(float[4])[] a, in Vector!(float[4])[] b) pure nothrow @trusted @nogc 13 in{ 14 assert(a.length == b.length); 15 } 16 body{ 17 alias V = Vector!(float[4]); 18 19 V sum; 20 sum = 0; 21 22 V* px = cast(V*)(a.ptr), 23 ph = cast(V*)(b.ptr), 24 qx = cast(V*)(px + a.length); 25 26 while(px != qx) 27 { 28 sum += *px * *ph; 29 ++px; 30 ++ph; 31 } 32 33 V ones; 34 ones.array = [1.0f, 1.0f, 1.0f, 1.0f]; 35 sum = __simd(XMM.DPPS, sum, ones, 0b11111111); 36 return sum.array[0]; 37 } 38 39 unittest 40 { 41 scope(failure) {writefln("Unittest failure :%s(%s)", __FILE__, __LINE__); stdout.flush();} 42 scope(success) {writefln("Unittest success :%s(%s)", __FILE__, __LINE__); stdout.flush();} 43 44 Vector!(float[4])[4] a; 45 Vector!(float[4])[4] b; 46 47 float[] as = a[0].ptr[0 .. 16], 48 bs = b[0].ptr[0 .. 16]; 49 50 float check = 0; 51 foreach(i; 0 .. 16){ 52 as[i] = i; 53 bs[i] = i+1; 54 check += i * (i+1); 55 } 56 57 assert(approxEqual(check, dotProduct(a, b))); 58 } 59 60 61 /** 62 a[0] <- [x.re, x.im, y.re, y.im] 63 b[0] <- [z.re, z.im, w.re, w.im] 64 65 return: sum of (x*z + y*w) 66 */ 67 cfloat cpxDotProduct(in Vector!(float[4])[] a, in Vector!(float[4])[] b) pure nothrow @trusted @nogc 68 in{ 69 assert(a.length == b.length); 70 } 71 body{ 72 Vector!(float[4]) r, q; 73 r = 0; 74 q = 0; 75 76 auto px = a.ptr, 77 ph = b.ptr, 78 qx = a.ptr + a.length; 79 80 while(px != qx) 81 { 82 Vector!(float[4]) x = *px, 83 h = *ph; 84 r += x * h; 85 86 x = __simd(XMM.SHUFPS, x, x, 0b10_11_00_01); 87 88 q += x * h; 89 90 ++px; 91 ++ph; 92 } 93 94 Vector!(float[4]) sign, ones; 95 sign.array = [1.0f, -1.0f, 1.0f, -1.0f]; 96 ones.array = [1.0f, 1.0f, 1.0f, 1.0f]; 97 98 r = __simd(XMM.DPPS, r, sign, 0b11111111); 99 q = __simd(XMM.DPPS, q, ones, 0b11111111); 100 101 return r.array[0] + q.array[0]*1i; 102 } 103 104 unittest 105 { 106 scope(failure) {writefln("Unittest failure :%s(%s)", __FILE__, __LINE__); stdout.flush();} 107 scope(success) {writefln("Unittest success :%s(%s)", __FILE__, __LINE__); stdout.flush();} 108 109 Vector!(float[4])[4] a; 110 Vector!(float[4])[4] b; 111 112 cfloat[] as = (cast(cfloat*)a[0].ptr)[0 .. 8], 113 bs = (cast(cfloat*)b[0].ptr)[0 .. 8]; 114 115 cfloat check = 0+0i; 116 foreach(i; 0 .. 8){ 117 as[i] = i + (i+1)*1i; 118 bs[i] = (i+1) + (i+2)*1i; 119 check += as[i] * bs[i]; 120 } 121 122 cfloat res = cpxDotProduct(a, b); 123 assert(approxEqual(check.re, res.re)); 124 assert(approxEqual(check.im, res.im)); 125 } 126 127 /* 128 import core.bitop; 129 import std.traits; 130 import carbon.traits; 131 enum bool isSIMDArray(T) = is(T : SIMDArray!(E, N), E, size_t N); 132 133 134 struct SIMDArray(E, size_t N) 135 if(N > 0) 136 { 137 private alias V = core.simd.Vector!(E[N]); 138 139 140 enum N_log2 = core.bitop.bsr(N); 141 alias ElementType = E; 142 143 this(inout(V)[] vec, size_t size) inout 144 { 145 _arr = vec; 146 _size = size; 147 } 148 149 150 this(V[] vec) 151 { 152 this(vec, vec.length * N); 153 } 154 155 156 this(size_t n) 157 { 158 immutable oldN = n; 159 160 if(n % N != 0) 161 n = n / N + 1; 162 else 163 n /= N; 164 165 this(new V[n], oldN); 166 } 167 168 169 @property 170 inout(ElementType)[] array() inout pure nothrow @nogc { return (cast(E[])_arr)[0 .. _size]; } 171 172 173 @property 174 inout(V)[] opSlice() inout pure nothrow @safe @nogc { return _arr; } 175 176 @property 177 inout(typeof(this)) opSlice(size_t i, size_t j) inout pure nothrow @safe @nogc 178 in{ 179 assert(i == 0); 180 } 181 body { 182 return typeof(return)(this._arr, j); 183 } 184 185 186 void opOpAssign(string op)(in SIMDArray!(E, N) rhs) 187 if(is(typeof((V a, V b){ mixin(`a` ~ op ~ "=b"); }))) 188 in { 189 assert(this.length == rhs.length); 190 } 191 body { 192 mixin(`_arr[] ` ~ op ~ "= rhs._arr[];"); 193 } 194 195 196 @property 197 size_t length() const pure nothrow @safe @nogc 198 { 199 return _size; 200 } 201 202 203 ref ElementType opIndex(size_t i) pure nothrow @safe @nogc 204 in { 205 assert(i < _size); 206 } 207 body { 208 enum size_t mask = (1 << N_log2) -1; 209 210 return _arr[i >> N_log2].array[i & mask]; 211 } 212 213 214 void storeEachResult(alias fn, T...)(in SIMDArray!(E, N) lhs, in SIMDArray!(E, N), rhs, T values) 215 if(is(typeof(fn(lhs._arr[0], rhs._arr[0], values)))) 216 in { 217 assert(this.length == lhs.length && this.length == rhs.length); 218 } 219 body { 220 V* plhs = () @trusted { return lhs._arr.ptr; }(), 221 prhs = () @trusted { return rhs._arr.ptr; }(); 222 223 foreach(i, ref e; _arr){ 224 e = fn(*plhs, *prhs, values); 225 ++plhs; ++prhs; 226 } 227 } 228 229 230 void storeEachResult(alias fn)(in SIMDArray!(E, N) arr, T values) 231 if(is(typeof(fn(arr._arr[0], values)))) 232 in { 233 assert(this.length == arr.length); 234 } 235 body { 236 V* parr = () @trusted { return arr._arr.ptr; }(); 237 238 foreach(i, ref e; _arr){ 239 e = fn(*parr, values); 240 ++parr; 241 } 242 } 243 244 245 void storeEachResult(alias fn, T...)(T values) 246 if(is(typeof(fn(values)))) 247 in { 248 249 } 250 body { 251 foreach(ref e; _arr) 252 e = fn(values); 253 } 254 255 256 private: 257 V[] _arr; 258 size_t _size; 259 } 260 261 262 alias SSEArray(E) = SIMDArray!(E, 16 / E.sizeof); 263 alias SSEImaginary(E) = SIMDImaginary!(E, 16 / E.sizeof); 264 alias SSEComplex(E) = SIMDComplex!(E, 16 / E.sizeof); 265 266 alias AVXArray(E) = SIMDArray!(E, 32 / E.sizeof); 267 alias AVXImaginary(E) = SIMDImaginary!(E, 32 / E.sizeof); 268 alias AVXComplex(E) = SIMDComplex!(E, 32 / E.sizeof); 269 270 alias FastSIMDArray(E) = Select!(is(AVXArray!E), AVXArray!E, SSEArray!E); 271 alias FastSIMDImaginary(E) = Select!(is(AVXImaginary!E), AVXImaginary!E, SSEImaginary!E); 272 alias FastSIMDComplex(E) = Select!(is(AVXComplex!E), AVXComplex!E, SSEComplex!E); 273 274 275 struct SIMDImaginary(E, N) 276 { 277 Vector!(E, N) im; 278 279 Vector!(E, N) re() const @property { Vector!(E, N) dst = 0; return dst; } 280 281 void opAssign(SIMDImaginary!(E, N) img) { im = img.im; } 282 void opAssign(IMG)(IMG e) if(isBuiltInImaginary!IMG) { im = e/1i; } 283 284 SIMDImaginary opBinary(string op: "+")(SIMDImaginary rhs) const { return SIMDImaginary(im + rhs.im); } 285 SIMDImaginary opBinary(string op: "+", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDImaginary(im + (e*-1i)); } 286 SIMDComplex!(E, N) opBinary(string op: "+")(E e) const { Vector!(E, N) re = e; return SIMDComplex!(E, N)(re, im); } 287 SIMDComplex!(E, N) opBinary(string op: "+")(Vector!(E, N) e) const { Vector!(E, N) re = e; return SIMDComplex!(E, N)(re, im); } 288 SIMDComplex!(E, N) opBinary(string op: "+", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { Vector!(E, N) re = cpx.re; return SIMDComplex!(E, N)(re, im + cpx.im); } 289 SIMDComplex!(E, N) opBinary(string op: "+")(Complex!E cpx) const { Vector!(E, N) re = cpx.re; return SIMDComplex!(E, N)(re, im + cpx.im); } 290 SIMDComplex!(E, N) opBinary(string op: "+")(SIMDComplex!(E, N) cpx) const { return SIMDComplex!(E, N)(cpx.re, im + cpx.im); } 291 292 SIMDImaginary opBinary(string op: "-")(SIMDImaginary rhs) const { return SIMDImaginary(im - rhs.im); } 293 SIMDImaginary opBinary(string op: "-", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDImaginary(im - e*(-1i)); } 294 SIMDComplex!(E, N) opBinary(string op: "-")(E e) const { Vector!(E, N) re = e; return SIMDComplex!(E, N)(re, im); } 295 SIMDComplex!(E, N) opBinary(string op: "-")(Vector!(E, N) e) const { Vector!(E, N) re = e; return SIMDComplex!(E, N)(re, im); } 296 SIMDComplex!(E, N) opBinary(string op: "-", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { Vector!(E, N) re = cpx.re; return SIMDComplex!(E, N)(re, im - cpx.im); } 297 SIMDComplex!(E, N) opBinary(string op: "-")(Complex!E cpx) const { Vector!(E, N) re = cpx.re; return SIMDComplex!(E, N)(re, im - cpx.im); } 298 SIMDComplex!(E, N) opBinary(string op: "-")(SIMDComplex!(E, N) cpx) const { return SIMDComplex!(E, N)(cpx.re, im - cpx.im); } 299 300 Vector!(E, N) opBinary(string op: "*")(SIMDImaginary rhs) const { return -(im * rhs.im); } 301 Vector!(E, N) opBinary(string op: "*", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return im * (e*1i); } 302 SIMDImaginary opBinary(string op: "*")(E e) const { return SIMDImaginary(im * e); } 303 SIMDImaginary opBinary(string op: "*")(Vector!(E, N) e) const { return SIMDImaginary(im * e); } 304 SIMDComplex!(E, N) opBinary(string op: "*", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { return SIMDComplex!(E, N)(-im * cpx.im, im * cpx.re); } 305 SIMDComplex!(E, N) opBinary(string op: "*")(Complex!E cpx) const { return SIMDComplex!(E, N)(-im*cpx.im, im * cpx.re); } 306 SIMDComplex!(E, N) opBinary(string op: "*")(SIMDComplex!(E, N) cpx) const { return SIMDComplex!(E, N)(-im * cpx.im, im * cpx.re); } 307 } 308 309 310 struct SIMDComplex(E, N) 311 { 312 Vector!(E, N) re; 313 Vector!(E, N) im; 314 315 void opAssign(SIMDComplex rhs) { re = rhs.re; im = rhs.im; } 316 void opAssign(E e) { re = e; im = 0; } 317 void opAssign(Vector!(E, N) r) { re = r; im = 0; } 318 void opAssign(IMG)(IMG e) if(isBuiltInImaginary!IMG) { re = 0; im = e; } 319 void opAssign(SIMDImaginary!(E, N) e) { re = 0; im = e; } 320 void opAssign(Complex!E cpx) { re = cpx.re; im = cpx.im; } 321 void opAssign(CPX)(CPX cpx) if(isBuiltInComplex!CPX) { re = cpx.re; im = cpx.im; } 322 323 SIMDComplex opBinary(string op: "+")(SIMDComplex rhs) const { return SIMDComplex(re + rhs.re, im + rhs.im); } 324 SIMDComplex opBinary(string op: "+")(E e) const { return SIMDComplex(re + e, im); } 325 SIMDComplex opBinary(string op: "+")(Vector!(E, N) r) const { return SIMDComplex(re + r, im); } 326 SIMDComplex opBinary(string op: "+", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDComplex(re, im + e/1i); } 327 SIMDComplex opBinary(string op: "+")(SIMDImaginary!(E, N) rhs) const { return SIMDComplex(re, im + rhs.im); } 328 SIMDComplex opBinary(string op: "+")(Complex!E cpx) const { return SIMDComplex(re + cpx.re, im + cpx.im); } 329 SIMDComplex opBinary(string op: "+", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { return SIMDComplex(re + cpx.re, im + cpx.im); } 330 331 SIMDComplex opBinary(string op: "-")(SIMDComplex rhs) const { return SIMDComplex(re - rhs.re, im - rhs.im); } 332 SIMDComplex opBinary(string op: "-")(E e) const { return SIMDComplex(re - e, im); } 333 SIMDComplex opBinary(string op: "-")(Vector!(E, N) r) const { return SIMDComplex(re - r, im); } 334 SIMDComplex opBinary(string op: "-", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDComplex(re, im - e/1i); } 335 SIMDComplex opBinary(string op: "-")(SIMDImaginary!(E, N) rhs) const { return SIMDComplex(re, im - rhs.im); } 336 SIMDComplex opBinary(string op: "-")(Complex!E cpx) const { return SIMDComplex(re - cpx.re, im - cpx.im); } 337 SIMDComplex opBinary(string op: "-", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { return SIMDComplex(re - cpx.re, im - cpx.im); } 338 339 SIMDComplex opBinary(string op: "*")(SIMDComplex rhs) const { return SIMDComplex(re * rhs.re - im * rhs.im, re * rhs.im + im * rhs.re); } 340 SIMDComplex opBinary(string op: "*")(E e) const { return SIMDComplex(re * e, im * e); } 341 SIMDComplex opBinary(string op: "*")(Vector!(E, N) r) const { return SIMDComplex(re * r, im * r); } 342 SIMDComplex opBinary(string op: "*", IMG)(IMG e) const if(isBuiltInImaginary!IMG) { return SIMDComplex(im * (e*1i), re * (e/1i)); } 343 SIMDComplex opBinary(string op: "*")(SIMDImaginary!(E, N) rhs) const { return SIMDComplex(-im * rhs.im, re * rhs.im); } 344 SIMDComplex opBinary(string op: "*")(Complex!E cpx) const { return SIMDComplex(re * cpx.re - im * cpx.im, re * cpx.im + im * cpx.re); } 345 SIMDComplex opBinary(string op: "*", CPX)(CPX cpx) const if(isBuiltInComplex!CPX) { return SIMDComplex(re * cpx.re - im * cpx.im, re * cpx.im + im * cpx.re); } 346 } 347 */