1 /** 2 * The core.internal.atomic module comtains the low-level atomic features available in hardware. 3 * This module may be a routing layer for compiler intrinsics. 4 * 5 * Copyright: Copyright Manu Evans 2019. 6 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Authors: Sean Kelly, Alex Rønne Petersen, Manu Evans 8 * Source: $(DRUNTIMESRC core/internal/_atomic.d) 9 */ 10 11 module core.internal.atomic; 12 13 import core.atomic : MemoryOrder, has128BitCAS; 14 15 version (LDC) 16 { 17 import ldc.intrinsics; 18 19 pragma(inline, true): 20 21 inout(T) atomicLoad(MemoryOrder order = MemoryOrder.seq, T)(inout(T)* src) pure nothrow @nogc @trusted 22 { 23 alias A = _AtomicType!T; 24 A result = llvm_atomic_load!A(cast(shared A*) src, _ordering!(order)); 25 return *cast(inout(T)*) &result; 26 } 27 28 void atomicStore(MemoryOrder order = MemoryOrder.seq, T)(T* dest, T value) pure nothrow @nogc @trusted 29 { 30 alias A = _AtomicType!T; 31 llvm_atomic_store!A(*cast(A*) &value, cast(shared A*) dest, _ordering!(order)); 32 } 33 34 T atomicFetchAdd(MemoryOrder order = MemoryOrder.seq, T)(T* dest, T value) pure nothrow @nogc @trusted 35 { 36 alias A = _AtomicType!T; 37 return llvm_atomic_rmw_add!A(cast(shared A*) dest, value, _ordering!(order)); 38 } 39 40 T atomicFetchSub(MemoryOrder order = MemoryOrder.seq, T)(T* dest, T value) pure nothrow @nogc @trusted 41 { 42 alias A = _AtomicType!T; 43 return llvm_atomic_rmw_sub!A(cast(shared A*) dest, value, _ordering!(order)); 44 } 45 46 T atomicExchange(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted 47 { 48 alias A = _AtomicType!T; 49 A result = llvm_atomic_rmw_xchg!A(cast(shared A*) dest, *cast(A*) &value, _ordering!(order)); 50 return *cast(T*) &result; 51 } 52 53 bool atomicCompareExchange(bool weak = false, MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted 54 { 55 alias A = _AtomicType!T; 56 auto result = llvm_atomic_cmp_xchg!A(cast(shared A*) dest, *cast(A*) compare, *cast(A*) &value, 57 _ordering!(succ), _ordering!(fail), weak); 58 *compare = *cast(T*) &result.previousValue; 59 return result.exchanged; 60 } 61 bool atomicCompareExchangeWeak(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted 62 { 63 return atomicCompareExchange!(true, succ, fail, T)(dest, compare, value); 64 } 65 bool atomicCompareExchangeStrong(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted 66 { 67 return atomicCompareExchange!(false, succ, fail, T)(dest, compare, value); 68 } 69 70 bool atomicCompareExchangeNoResult(bool weak = false, MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, const T compare, T value) pure nothrow @nogc @trusted 71 { 72 alias A = _AtomicType!T; 73 auto result = llvm_atomic_cmp_xchg!A(cast(shared A*) dest, *cast(A*) &compare, *cast(A*) &value, 74 _ordering!(succ), _ordering!(fail), weak); 75 return result.exchanged; 76 } 77 bool atomicCompareExchangeWeakNoResult(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, const T compare, T value) pure nothrow @nogc @trusted 78 { 79 return atomicCompareExchangeNoResult!(true, succ, fail, T)(dest, compare, value); 80 } 81 bool atomicCompareExchangeStrongNoResult(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, const T compare, T value) pure nothrow @nogc @trusted 82 { 83 return atomicCompareExchangeNoResult!(false, succ, fail, T)(dest, compare, value); 84 } 85 86 void atomicFence(MemoryOrder order = MemoryOrder.seq)() pure nothrow @nogc @trusted 87 { 88 llvm_memory_fence(_ordering!(order)); 89 } 90 91 void pause() pure nothrow @nogc @trusted 92 { 93 version (X86) 94 enum inst = "pause"; 95 else version (X86_64) 96 enum inst = "pause"; 97 else version (ARM) 98 { 99 // requires v6k+ (e.g., -mtriple=armv6k-linux-gnueabihf) 100 static if (__traits(targetHasFeature, "v6k")) 101 enum inst = "yield"; 102 else 103 enum inst = null; 104 } 105 else version (AArch64) 106 enum inst = "yield"; 107 else version (MIPS32) 108 { 109 // requires ISA r2+ (e.g., -mcpu=mips32r2) 110 static if (__traits(targetHasFeature, "mips32r2")) 111 enum inst = "pause"; 112 else 113 enum inst = null; 114 } 115 else version (MIPS64) 116 { 117 // requires ISA r2+ (e.g., -mcpu=mips64r2) 118 static if (__traits(targetHasFeature, "mips64r2")) 119 enum inst = "pause"; 120 else 121 enum inst = null; 122 } 123 else 124 enum inst = null; // TODO? 125 126 static if (inst !is null) 127 asm pure nothrow @nogc @trusted { (inst); } 128 } 129 130 template _ordering(MemoryOrder ms) 131 { 132 static if (ms == MemoryOrder.acq) 133 enum _ordering = AtomicOrdering.Acquire; 134 else static if (ms == MemoryOrder.rel) 135 enum _ordering = AtomicOrdering.Release; 136 else static if (ms == MemoryOrder.acq_rel) 137 enum _ordering = AtomicOrdering.AcquireRelease; 138 else static if (ms == MemoryOrder.seq) 139 enum _ordering = AtomicOrdering.SequentiallyConsistent; 140 else static if (ms == MemoryOrder.raw) 141 { 142 // Note that C/C++ 'relaxed' is not the same as NoAtomic/Unordered, 143 // but Monotonic. 144 enum _ordering = AtomicOrdering.Monotonic; 145 } 146 else 147 static assert(0); 148 } 149 150 private template _AtomicType(T) 151 { 152 static if (T.sizeof == ubyte.sizeof) 153 alias _AtomicType = ubyte; 154 else static if (T.sizeof == ushort.sizeof) 155 alias _AtomicType = ushort; 156 else static if (T.sizeof == uint.sizeof) 157 alias _AtomicType = uint; 158 else static if (T.sizeof == ulong.sizeof) 159 alias _AtomicType = ulong; 160 else static if (T.sizeof == 2*ulong.sizeof && has128BitCAS) 161 { 162 struct UCent 163 { 164 ulong value1; 165 ulong value2; 166 } 167 168 alias _AtomicType = UCent; 169 } 170 else 171 static assert(is(_AtomicType!T), 172 "Cannot atomically load/store type of size " ~ T.sizeof.stringof); 173 } 174 } 175 else: // !LDC 176 177 version (DigitalMars) 178 { 179 private 180 { 181 enum : int 182 { 183 AX, BX, CX, DX, DI, SI, R8, R9 184 } 185 186 immutable string[4][8] registerNames = [ 187 [ "AL", "AX", "EAX", "RAX" ], 188 [ "BL", "BX", "EBX", "RBX" ], 189 [ "CL", "CX", "ECX", "RCX" ], 190 [ "DL", "DX", "EDX", "RDX" ], 191 [ "DIL", "DI", "EDI", "RDI" ], 192 [ "SIL", "SI", "ESI", "RSI" ], 193 [ "R8B", "R8W", "R8D", "R8" ], 194 [ "R9B", "R9W", "R9D", "R9" ], 195 ]; 196 197 template RegIndex(T) 198 { 199 static if (T.sizeof == 1) 200 enum RegIndex = 0; 201 else static if (T.sizeof == 2) 202 enum RegIndex = 1; 203 else static if (T.sizeof == 4) 204 enum RegIndex = 2; 205 else static if (T.sizeof == 8) 206 enum RegIndex = 3; 207 else 208 static assert(false, "Invalid type"); 209 } 210 211 enum SizedReg(int reg, T = size_t) = registerNames[reg][RegIndex!T]; 212 } 213 214 inout(T) atomicLoad(MemoryOrder order = MemoryOrder.seq, T)(inout(T)* src) pure nothrow @nogc @trusted 215 if (CanCAS!T) 216 { 217 static assert(order != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()"); 218 219 static if (T.sizeof == size_t.sizeof * 2) 220 { 221 version (D_InlineAsm_X86) 222 { 223 asm pure nothrow @nogc @trusted 224 { 225 push EDI; 226 push EBX; 227 mov EBX, 0; 228 mov ECX, 0; 229 mov EAX, 0; 230 mov EDX, 0; 231 mov EDI, src; 232 lock; cmpxchg8b [EDI]; 233 pop EBX; 234 pop EDI; 235 } 236 } 237 else version (D_InlineAsm_X86_64) 238 { 239 version (Windows) 240 { 241 static if (RegisterReturn!T) 242 { 243 enum SrcPtr = SizedReg!CX; 244 enum RetPtr = null; 245 } 246 else 247 { 248 enum SrcPtr = SizedReg!DX; 249 enum RetPtr = SizedReg!CX; 250 } 251 252 mixin (simpleFormat(q{ 253 asm pure nothrow @nogc @trusted 254 { 255 naked; 256 push RBX; 257 mov R8, %0; 258 ?1 mov R9, %1; 259 mov RBX, 0; 260 mov RCX, 0; 261 mov RAX, 0; 262 mov RDX, 0; 263 lock; cmpxchg16b [R8]; 264 ?1 mov [R9], RAX; 265 ?1 mov 8[R9], RDX; 266 pop RBX; 267 ret; 268 } 269 }, [SrcPtr, RetPtr])); 270 } 271 else 272 { 273 asm pure nothrow @nogc @trusted 274 { 275 naked; 276 push RBX; 277 mov RBX, 0; 278 mov RCX, 0; 279 mov RAX, 0; 280 mov RDX, 0; 281 lock; cmpxchg16b [RDI]; 282 pop RBX; 283 ret; 284 } 285 } 286 } 287 } 288 else static if (needsLoadBarrier!order) 289 { 290 version (D_InlineAsm_X86) 291 { 292 enum SrcReg = SizedReg!CX; 293 enum ZeroReg = SizedReg!(DX, T); 294 enum ResReg = SizedReg!(AX, T); 295 296 mixin (simpleFormat(q{ 297 asm pure nothrow @nogc @trusted 298 { 299 mov %1, 0; 300 mov %2, 0; 301 mov %0, src; 302 lock; cmpxchg [%0], %1; 303 } 304 }, [SrcReg, ZeroReg, ResReg])); 305 } 306 else version (D_InlineAsm_X86_64) 307 { 308 version (Windows) 309 enum SrcReg = SizedReg!CX; 310 else 311 enum SrcReg = SizedReg!DI; 312 enum ZeroReg = SizedReg!(DX, T); 313 enum ResReg = SizedReg!(AX, T); 314 315 mixin (simpleFormat(q{ 316 asm pure nothrow @nogc @trusted 317 { 318 naked; 319 mov %1, 0; 320 mov %2, 0; 321 lock; cmpxchg [%0], %1; 322 ret; 323 } 324 }, [SrcReg, ZeroReg, ResReg])); 325 } 326 } 327 else 328 return *src; 329 } 330 331 void atomicStore(MemoryOrder order = MemoryOrder.seq, T)(T* dest, T value) pure nothrow @nogc @trusted 332 if (CanCAS!T) 333 { 334 static assert(order != MemoryOrder.acq, "Invalid MemoryOrder for atomicStore()"); 335 336 static if (T.sizeof == size_t.sizeof * 2) 337 { 338 version (D_InlineAsm_X86) 339 { 340 asm pure nothrow @nogc @trusted 341 { 342 push EDI; 343 push EBX; 344 lea EDI, value; 345 mov EBX, [EDI]; 346 mov ECX, 4[EDI]; 347 mov EDI, dest; 348 mov EAX, [EDI]; 349 mov EDX, 4[EDI]; 350 L1: lock; cmpxchg8b [EDI]; 351 jne L1; 352 pop EBX; 353 pop EDI; 354 } 355 } 356 else version (D_InlineAsm_X86_64) 357 { 358 version (Windows) 359 { 360 asm pure nothrow @nogc @trusted 361 { 362 naked; 363 push RBX; 364 mov R8, RDX; 365 mov RAX, [RDX]; 366 mov RDX, 8[RDX]; 367 mov RBX, [RCX]; 368 mov RCX, 8[RCX]; 369 L1: lock; cmpxchg16b [R8]; 370 jne L1; 371 pop RBX; 372 ret; 373 } 374 } 375 else 376 { 377 asm pure nothrow @nogc @trusted 378 { 379 naked; 380 push RBX; 381 mov RBX, RDI; 382 mov RCX, RSI; 383 mov RDI, RDX; 384 mov RAX, [RDX]; 385 mov RDX, 8[RDX]; 386 L1: lock; cmpxchg16b [RDI]; 387 jne L1; 388 pop RBX; 389 ret; 390 } 391 } 392 } 393 } 394 else static if (needsStoreBarrier!order) 395 atomicExchange!(order, false)(dest, value); 396 else 397 *dest = value; 398 } 399 400 T atomicFetchAdd(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted 401 if (is(T : ulong)) 402 { 403 version (D_InlineAsm_X86) 404 { 405 static assert(T.sizeof <= 4, "64bit atomicFetchAdd not supported on 32bit target." ); 406 407 enum DestReg = SizedReg!DX; 408 enum ValReg = SizedReg!(AX, T); 409 410 mixin (simpleFormat(q{ 411 asm pure nothrow @nogc @trusted 412 { 413 mov %1, value; 414 mov %0, dest; 415 lock; xadd[%0], %1; 416 } 417 }, [DestReg, ValReg])); 418 } 419 else version (D_InlineAsm_X86_64) 420 { 421 version (Windows) 422 { 423 enum DestReg = SizedReg!DX; 424 enum ValReg = SizedReg!(CX, T); 425 } 426 else 427 { 428 enum DestReg = SizedReg!SI; 429 enum ValReg = SizedReg!(DI, T); 430 } 431 enum ResReg = result ? SizedReg!(AX, T) : null; 432 433 mixin (simpleFormat(q{ 434 asm pure nothrow @nogc @trusted 435 { 436 naked; 437 lock; xadd[%0], %1; 438 ?2 mov %2, %1; 439 ret; 440 } 441 }, [DestReg, ValReg, ResReg])); 442 } 443 else 444 static assert (false, "Unsupported architecture."); 445 } 446 447 T atomicFetchSub(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted 448 if (is(T : ulong)) 449 { 450 return atomicFetchAdd(dest, cast(T)-cast(IntOrLong!T)value); 451 } 452 453 T atomicExchange(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted 454 if (CanCAS!T) 455 { 456 version (D_InlineAsm_X86) 457 { 458 static assert(T.sizeof <= 4, "64bit atomicExchange not supported on 32bit target." ); 459 460 enum DestReg = SizedReg!CX; 461 enum ValReg = SizedReg!(AX, T); 462 463 mixin (simpleFormat(q{ 464 asm pure nothrow @nogc @trusted 465 { 466 mov %1, value; 467 mov %0, dest; 468 xchg [%0], %1; 469 } 470 }, [DestReg, ValReg])); 471 } 472 else version (D_InlineAsm_X86_64) 473 { 474 version (Windows) 475 { 476 enum DestReg = SizedReg!DX; 477 enum ValReg = SizedReg!(CX, T); 478 } 479 else 480 { 481 enum DestReg = SizedReg!SI; 482 enum ValReg = SizedReg!(DI, T); 483 } 484 enum ResReg = result ? SizedReg!(AX, T) : null; 485 486 mixin (simpleFormat(q{ 487 asm pure nothrow @nogc @trusted 488 { 489 naked; 490 xchg [%0], %1; 491 ?2 mov %2, %1; 492 ret; 493 } 494 }, [DestReg, ValReg, ResReg])); 495 } 496 else 497 static assert (false, "Unsupported architecture."); 498 } 499 500 alias atomicCompareExchangeWeak = atomicCompareExchangeStrong; 501 502 bool atomicCompareExchangeStrong(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted 503 if (CanCAS!T) 504 { 505 version (D_InlineAsm_X86) 506 { 507 static if (T.sizeof <= 4) 508 { 509 enum DestAddr = SizedReg!CX; 510 enum CmpAddr = SizedReg!DI; 511 enum Val = SizedReg!(DX, T); 512 enum Cmp = SizedReg!(AX, T); 513 514 mixin (simpleFormat(q{ 515 asm pure nothrow @nogc @trusted 516 { 517 push %1; 518 mov %2, value; 519 mov %1, compare; 520 mov %3, [%1]; 521 mov %0, dest; 522 lock; cmpxchg [%0], %2; 523 mov [%1], %3; 524 setz AL; 525 pop %1; 526 } 527 }, [DestAddr, CmpAddr, Val, Cmp])); 528 } 529 else static if (T.sizeof == 8) 530 { 531 asm pure nothrow @nogc @trusted 532 { 533 push EDI; 534 push EBX; 535 lea EDI, value; 536 mov EBX, [EDI]; 537 mov ECX, 4[EDI]; 538 mov EDI, compare; 539 mov EAX, [EDI]; 540 mov EDX, 4[EDI]; 541 mov EDI, dest; 542 lock; cmpxchg8b [EDI]; 543 mov EDI, compare; 544 mov [EDI], EAX; 545 mov 4[EDI], EDX; 546 setz AL; 547 pop EBX; 548 pop EDI; 549 } 550 } 551 else 552 static assert(T.sizeof <= 8, "128bit atomicCompareExchangeStrong not supported on 32bit target." ); 553 } 554 else version (D_InlineAsm_X86_64) 555 { 556 static if (T.sizeof <= 8) 557 { 558 version (Windows) 559 { 560 enum DestAddr = SizedReg!R8; 561 enum CmpAddr = SizedReg!DX; 562 enum Val = SizedReg!(CX, T); 563 } 564 else 565 { 566 enum DestAddr = SizedReg!DX; 567 enum CmpAddr = SizedReg!SI; 568 enum Val = SizedReg!(DI, T); 569 } 570 enum Res = SizedReg!(AX, T); 571 572 mixin (simpleFormat(q{ 573 asm pure nothrow @nogc @trusted 574 { 575 naked; 576 mov %3, [%1]; 577 lock; cmpxchg [%0], %2; 578 jne compare_fail; 579 mov AL, 1; 580 ret; 581 compare_fail: 582 mov [%1], %3; 583 xor AL, AL; 584 ret; 585 } 586 }, [DestAddr, CmpAddr, Val, Res])); 587 } 588 else 589 { 590 version (Windows) 591 { 592 asm pure nothrow @nogc @trusted 593 { 594 naked; 595 push RBX; 596 mov R9, RDX; 597 mov RAX, [RDX]; 598 mov RDX, 8[RDX]; 599 mov RBX, [RCX]; 600 mov RCX, 8[RCX]; 601 lock; cmpxchg16b [R8]; 602 pop RBX; 603 jne compare_fail; 604 mov AL, 1; 605 ret; 606 compare_fail: 607 mov [R9], RAX; 608 mov 8[R9], RDX; 609 xor AL, AL; 610 ret; 611 } 612 } 613 else 614 { 615 asm pure nothrow @nogc @trusted 616 { 617 naked; 618 push RBX; 619 mov R8, RCX; 620 mov R9, RDX; 621 mov RAX, [RDX]; 622 mov RDX, 8[RDX]; 623 mov RBX, RDI; 624 mov RCX, RSI; 625 lock; cmpxchg16b [R8]; 626 pop RBX; 627 jne compare_fail; 628 mov AL, 1; 629 ret; 630 compare_fail: 631 mov [R9], RAX; 632 mov 8[R9], RDX; 633 xor AL, AL; 634 ret; 635 } 636 } 637 } 638 } 639 else 640 static assert (false, "Unsupported architecture."); 641 } 642 643 alias atomicCompareExchangeWeakNoResult = atomicCompareExchangeStrongNoResult; 644 645 bool atomicCompareExchangeStrongNoResult(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, const T compare, T value) pure nothrow @nogc @trusted 646 if (CanCAS!T) 647 { 648 version (D_InlineAsm_X86) 649 { 650 static if (T.sizeof <= 4) 651 { 652 enum DestAddr = SizedReg!CX; 653 enum Cmp = SizedReg!(AX, T); 654 enum Val = SizedReg!(DX, T); 655 656 mixin (simpleFormat(q{ 657 asm pure nothrow @nogc @trusted 658 { 659 mov %2, value; 660 mov %1, compare; 661 mov %0, dest; 662 lock; cmpxchg [%0], %2; 663 setz AL; 664 } 665 }, [DestAddr, Cmp, Val])); 666 } 667 else static if (T.sizeof == 8) 668 { 669 asm pure nothrow @nogc @trusted 670 { 671 push EDI; 672 push EBX; 673 lea EDI, value; 674 mov EBX, [EDI]; 675 mov ECX, 4[EDI]; 676 lea EDI, compare; 677 mov EAX, [EDI]; 678 mov EDX, 4[EDI]; 679 mov EDI, dest; 680 lock; cmpxchg8b [EDI]; 681 setz AL; 682 pop EBX; 683 pop EDI; 684 } 685 } 686 else 687 static assert(T.sizeof <= 8, "128bit atomicCompareExchangeStrong not supported on 32bit target." ); 688 } 689 else version (D_InlineAsm_X86_64) 690 { 691 static if (T.sizeof <= 8) 692 { 693 version (Windows) 694 { 695 enum DestAddr = SizedReg!R8; 696 enum Cmp = SizedReg!(DX, T); 697 enum Val = SizedReg!(CX, T); 698 } 699 else 700 { 701 enum DestAddr = SizedReg!DX; 702 enum Cmp = SizedReg!(SI, T); 703 enum Val = SizedReg!(DI, T); 704 } 705 enum AXReg = SizedReg!(AX, T); 706 707 mixin (simpleFormat(q{ 708 asm pure nothrow @nogc @trusted 709 { 710 naked; 711 mov %3, %1; 712 lock; cmpxchg [%0], %2; 713 setz AL; 714 ret; 715 } 716 }, [DestAddr, Cmp, Val, AXReg])); 717 } 718 else 719 { 720 version (Windows) 721 { 722 asm pure nothrow @nogc @trusted 723 { 724 naked; 725 push RBX; 726 mov RAX, [RDX]; 727 mov RDX, 8[RDX]; 728 mov RBX, [RCX]; 729 mov RCX, 8[RCX]; 730 lock; cmpxchg16b [R8]; 731 setz AL; 732 pop RBX; 733 ret; 734 } 735 } 736 else 737 { 738 asm pure nothrow @nogc @trusted 739 { 740 naked; 741 push RBX; 742 mov RAX, RDX; 743 mov RDX, RCX; 744 mov RBX, RDI; 745 mov RCX, RSI; 746 lock; cmpxchg16b [R8]; 747 setz AL; 748 pop RBX; 749 ret; 750 } 751 } 752 } 753 } 754 else 755 static assert (false, "Unsupported architecture."); 756 } 757 758 void atomicFence(MemoryOrder order = MemoryOrder.seq)() pure nothrow @nogc @trusted 759 { 760 // TODO: `mfence` should only be required for seq_cst operations, but this depends on 761 // the compiler's backend knowledge to not reorder code inappropriately, 762 // so we'll apply it conservatively. 763 static if (order != MemoryOrder.raw) 764 { 765 version (D_InlineAsm_X86) 766 { 767 import core.cpuid; 768 769 // TODO: review this implementation; it seems way overly complicated 770 asm pure nothrow @nogc @trusted 771 { 772 naked; 773 774 call sse2; 775 test AL, AL; 776 jne Lcpuid; 777 778 // Fast path: We have SSE2, so just use mfence. 779 mfence; 780 jmp Lend; 781 782 Lcpuid: 783 784 // Slow path: We use cpuid to serialize. This is 785 // significantly slower than mfence, but is the 786 // only serialization facility we have available 787 // on older non-SSE2 chips. 788 push EBX; 789 790 mov EAX, 0; 791 cpuid; 792 793 pop EBX; 794 795 Lend: 796 797 ret; 798 } 799 } 800 else version (D_InlineAsm_X86_64) 801 { 802 asm pure nothrow @nogc @trusted 803 { 804 naked; 805 mfence; 806 ret; 807 } 808 } 809 else 810 static assert (false, "Unsupported architecture."); 811 } 812 } 813 814 void pause() pure nothrow @nogc @trusted 815 { 816 version (D_InlineAsm_X86) 817 { 818 asm pure nothrow @nogc @trusted 819 { 820 naked; 821 rep; nop; 822 ret; 823 } 824 } 825 else version (D_InlineAsm_X86_64) 826 { 827 asm pure nothrow @nogc @trusted 828 { 829 naked; 830 // pause; // TODO: DMD should add this opcode to its inline asm 831 rep; nop; 832 ret; 833 } 834 } 835 else 836 { 837 // ARM should `yield` 838 // other architectures? otherwise some sort of nop... 839 } 840 } 841 } 842 else version (GNU) 843 { 844 import gcc.builtins; 845 import gcc.config; 846 847 inout(T) atomicLoad(MemoryOrder order = MemoryOrder.seq, T)(inout(T)* src) pure nothrow @nogc @trusted 848 if (CanCAS!T) 849 { 850 static assert(order != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()"); 851 852 static if (GNU_Have_Atomics || GNU_Have_LibAtomic) 853 { 854 static if (T.sizeof == ubyte.sizeof) 855 { 856 ubyte value = __atomic_load_1(cast(shared)src, order); 857 return *cast(typeof(return)*)&value; 858 } 859 else static if (T.sizeof == ushort.sizeof) 860 { 861 ushort value = __atomic_load_2(cast(shared)src, order); 862 return *cast(typeof(return)*)&value; 863 } 864 else static if (T.sizeof == uint.sizeof) 865 { 866 uint value = __atomic_load_4(cast(shared)src, order); 867 return *cast(typeof(return)*)&value; 868 } 869 else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics) 870 { 871 ulong value = __atomic_load_8(cast(shared)src, order); 872 return *cast(typeof(return)*)&value; 873 } 874 else static if (GNU_Have_LibAtomic) 875 { 876 T value; 877 __atomic_load(T.sizeof, cast(shared)src, &value, order); 878 return *cast(typeof(return)*)&value; 879 } 880 else 881 static assert(0, "Invalid template type specified."); 882 } 883 else 884 { 885 getAtomicMutex.lock(); 886 scope(exit) getAtomicMutex.unlock(); 887 return *cast(typeof(return)*)&src; 888 } 889 } 890 891 void atomicStore(MemoryOrder order = MemoryOrder.seq, T)(T* dest, T value) pure nothrow @nogc @trusted 892 if (CanCAS!T) 893 { 894 static assert(order != MemoryOrder.acq, "Invalid MemoryOrder for atomicStore()"); 895 896 static if (GNU_Have_Atomics || GNU_Have_LibAtomic) 897 { 898 static if (T.sizeof == ubyte.sizeof) 899 __atomic_store_1(cast(shared)dest, *cast(ubyte*)&value, order); 900 else static if (T.sizeof == ushort.sizeof) 901 __atomic_store_2(cast(shared)dest, *cast(ushort*)&value, order); 902 else static if (T.sizeof == uint.sizeof) 903 __atomic_store_4(cast(shared)dest, *cast(uint*)&value, order); 904 else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics) 905 __atomic_store_8(cast(shared)dest, *cast(ulong*)&value, order); 906 else static if (GNU_Have_LibAtomic) 907 __atomic_store(T.sizeof, cast(shared)dest, cast(void*)&value, order); 908 else 909 static assert(0, "Invalid template type specified."); 910 } 911 else 912 { 913 getAtomicMutex.lock(); 914 *dest = value; 915 getAtomicMutex.unlock(); 916 } 917 } 918 919 T atomicFetchAdd(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted 920 if (is(T : ulong)) 921 { 922 static if (GNU_Have_Atomics || GNU_Have_LibAtomic) 923 { 924 static if (T.sizeof == ubyte.sizeof) 925 return __atomic_fetch_add_1(cast(shared)dest, value, order); 926 else static if (T.sizeof == ushort.sizeof) 927 return __atomic_fetch_add_2(cast(shared)dest, value, order); 928 else static if (T.sizeof == uint.sizeof) 929 return __atomic_fetch_add_4(cast(shared)dest, value, order); 930 else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics) 931 return __atomic_fetch_add_8(cast(shared)dest, value, order); 932 else static if (GNU_Have_LibAtomic) 933 return __atomic_fetch_add(T.sizeof, cast(shared)dest, cast(void*)&value, order); 934 else 935 static assert(0, "Invalid template type specified."); 936 } 937 else 938 { 939 getAtomicMutex.lock(); 940 scope(exit) getAtomicMutex.unlock(); 941 T tmp = *dest; 942 *dest += value; 943 return tmp; 944 } 945 } 946 947 T atomicFetchSub(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted 948 if (is(T : ulong)) 949 { 950 static if (GNU_Have_Atomics || GNU_Have_LibAtomic) 951 { 952 static if (T.sizeof == ubyte.sizeof) 953 return __atomic_fetch_sub_1(cast(shared)dest, value, order); 954 else static if (T.sizeof == ushort.sizeof) 955 return __atomic_fetch_sub_2(cast(shared)dest, value, order); 956 else static if (T.sizeof == uint.sizeof) 957 return __atomic_fetch_sub_4(cast(shared)dest, value, order); 958 else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics) 959 return __atomic_fetch_sub_8(cast(shared)dest, value, order); 960 else static if (GNU_Have_LibAtomic) 961 return __atomic_fetch_sub(T.sizeof, cast(shared)dest, cast(void*)&value, order); 962 else 963 static assert(0, "Invalid template type specified."); 964 } 965 else 966 { 967 getAtomicMutex.lock(); 968 scope(exit) getAtomicMutex.unlock(); 969 T tmp = *dest; 970 *dest -= value; 971 return tmp; 972 } 973 } 974 975 T atomicExchange(MemoryOrder order = MemoryOrder.seq, bool result = true, T)(T* dest, T value) pure nothrow @nogc @trusted 976 if (is(T : ulong) || is(T == class) || is(T == interface) || is(T U : U*)) 977 { 978 static if (GNU_Have_Atomics || GNU_Have_LibAtomic) 979 { 980 static if (T.sizeof == byte.sizeof) 981 { 982 ubyte res = __atomic_exchange_1(cast(shared)dest, *cast(ubyte*)&value, order); 983 return *cast(typeof(return)*)&res; 984 } 985 else static if (T.sizeof == short.sizeof) 986 { 987 ushort res = __atomic_exchange_2(cast(shared)dest, *cast(ushort*)&value, order); 988 return *cast(typeof(return)*)&res; 989 } 990 else static if (T.sizeof == int.sizeof) 991 { 992 uint res = __atomic_exchange_4(cast(shared)dest, *cast(uint*)&value, order); 993 return *cast(typeof(return)*)&res; 994 } 995 else static if (T.sizeof == long.sizeof && GNU_Have_64Bit_Atomics) 996 { 997 ulong res = __atomic_exchange_8(cast(shared)dest, *cast(ulong*)&value, order); 998 return *cast(typeof(return)*)&res; 999 } 1000 else static if (GNU_Have_LibAtomic) 1001 { 1002 T res = void; 1003 __atomic_exchange(T.sizeof, cast(shared)dest, cast(void*)&value, &res, order); 1004 return res; 1005 } 1006 else 1007 static assert(0, "Invalid template type specified."); 1008 } 1009 else 1010 { 1011 getAtomicMutex.lock(); 1012 scope(exit) getAtomicMutex.unlock(); 1013 1014 T res = *dest; 1015 *dest = value; 1016 return res; 1017 } 1018 } 1019 1020 bool atomicCompareExchangeWeak(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted 1021 if (CanCAS!T) 1022 { 1023 return atomicCompareExchangeImpl!(succ, fail, true)(dest, compare, value); 1024 } 1025 1026 bool atomicCompareExchangeStrong(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted 1027 if (CanCAS!T) 1028 { 1029 return atomicCompareExchangeImpl!(succ, fail, false)(dest, compare, value); 1030 } 1031 1032 bool atomicCompareExchangeStrongNoResult(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, const T compare, T value) pure nothrow @nogc @trusted 1033 if (CanCAS!T) 1034 { 1035 return atomicCompareExchangeImpl!(succ, fail, false)(dest, cast(T*)&compare, value); 1036 } 1037 1038 bool atomicCompareExchangeWeakNoResult(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, T)(T* dest, const T compare, T value) pure nothrow @nogc @trusted 1039 if (CanCAS!T) 1040 { 1041 return atomicCompareExchangeImpl!(succ, fail, true)(dest, cast(T*)&compare, value); 1042 } 1043 1044 private bool atomicCompareExchangeImpl(MemoryOrder succ = MemoryOrder.seq, MemoryOrder fail = MemoryOrder.seq, bool weak, T)(T* dest, T* compare, T value) pure nothrow @nogc @trusted 1045 if (CanCAS!T) 1046 { 1047 bool res = void; 1048 1049 static if (GNU_Have_Atomics || GNU_Have_LibAtomic) 1050 { 1051 static if (T.sizeof == byte.sizeof) 1052 res = __atomic_compare_exchange_1(cast(shared)dest, compare, *cast(ubyte*)&value, 1053 weak, succ, fail); 1054 else static if (T.sizeof == short.sizeof) 1055 res = __atomic_compare_exchange_2(cast(shared)dest, compare, *cast(ushort*)&value, 1056 weak, succ, fail); 1057 else static if (T.sizeof == int.sizeof) 1058 res = __atomic_compare_exchange_4(cast(shared)dest, compare, *cast(uint*)&value, 1059 weak, succ, fail); 1060 else static if (T.sizeof == long.sizeof && GNU_Have_64Bit_Atomics) 1061 res = __atomic_compare_exchange_8(cast(shared)dest, compare, *cast(ulong*)&value, 1062 weak, succ, fail); 1063 else static if (GNU_Have_LibAtomic) 1064 res = __atomic_compare_exchange(T.sizeof, cast(shared)dest, compare, cast(void*)&value, 1065 succ, fail); 1066 else 1067 static assert(0, "Invalid template type specified."); 1068 } 1069 else 1070 { 1071 static if (T.sizeof == byte.sizeof) 1072 alias U = byte; 1073 else static if (T.sizeof == short.sizeof) 1074 alias U = short; 1075 else static if (T.sizeof == int.sizeof) 1076 alias U = int; 1077 else static if (T.sizeof == long.sizeof) 1078 alias U = long; 1079 else 1080 static assert(0, "Invalid template type specified."); 1081 1082 getAtomicMutex.lock(); 1083 scope(exit) getAtomicMutex.unlock(); 1084 1085 if (*cast(U*)dest == *cast(U*)&compare) 1086 { 1087 *dest = value; 1088 res = true; 1089 } 1090 else 1091 { 1092 *compare = *dest; 1093 res = false; 1094 } 1095 } 1096 1097 return res; 1098 } 1099 1100 void atomicFence(MemoryOrder order = MemoryOrder.seq)() pure nothrow @nogc @trusted 1101 { 1102 static if (GNU_Have_Atomics || GNU_Have_LibAtomic) 1103 __atomic_thread_fence(order); 1104 else 1105 { 1106 getAtomicMutex.lock(); 1107 getAtomicMutex.unlock(); 1108 } 1109 } 1110 1111 void pause() pure nothrow @nogc @trusted 1112 { 1113 version (X86) 1114 { 1115 __builtin_ia32_pause(); 1116 } 1117 else version (X86_64) 1118 { 1119 __builtin_ia32_pause(); 1120 } 1121 else 1122 { 1123 // Other architectures? Some sort of nop or barrier. 1124 } 1125 } 1126 1127 static if (!GNU_Have_Atomics && !GNU_Have_LibAtomic) 1128 { 1129 // Use system mutex for atomics, faking the purity of the functions so 1130 // that they can be used in pure/nothrow/@safe code. 1131 extern (C) private pure @trusted @nogc nothrow 1132 { 1133 static if (GNU_Thread_Model == ThreadModel.Posix) 1134 { 1135 import core.sys.posix.pthread; 1136 alias atomicMutexHandle = pthread_mutex_t; 1137 1138 pragma(mangle, "pthread_mutex_init") int fakePureMutexInit(pthread_mutex_t*, pthread_mutexattr_t*); 1139 pragma(mangle, "pthread_mutex_lock") int fakePureMutexLock(pthread_mutex_t*); 1140 pragma(mangle, "pthread_mutex_unlock") int fakePureMutexUnlock(pthread_mutex_t*); 1141 } 1142 else static if (GNU_Thread_Model == ThreadModel.Win32) 1143 { 1144 import core.sys.windows.winbase; 1145 alias atomicMutexHandle = CRITICAL_SECTION; 1146 1147 pragma(mangle, "InitializeCriticalSection") int fakePureMutexInit(CRITICAL_SECTION*); 1148 pragma(mangle, "EnterCriticalSection") void fakePureMutexLock(CRITICAL_SECTION*); 1149 pragma(mangle, "LeaveCriticalSection") int fakePureMutexUnlock(CRITICAL_SECTION*); 1150 } 1151 else 1152 { 1153 alias atomicMutexHandle = int; 1154 } 1155 } 1156 1157 // Implements lock/unlock operations. 1158 private struct AtomicMutex 1159 { 1160 int lock() pure @trusted @nogc nothrow 1161 { 1162 static if (GNU_Thread_Model == ThreadModel.Posix) 1163 { 1164 if (!_inited) 1165 { 1166 fakePureMutexInit(&_handle, null); 1167 _inited = true; 1168 } 1169 return fakePureMutexLock(&_handle); 1170 } 1171 else 1172 { 1173 static if (GNU_Thread_Model == ThreadModel.Win32) 1174 { 1175 if (!_inited) 1176 { 1177 fakePureMutexInit(&_handle); 1178 _inited = true; 1179 } 1180 fakePureMutexLock(&_handle); 1181 } 1182 return 0; 1183 } 1184 } 1185 1186 int unlock() pure @trusted @nogc nothrow 1187 { 1188 static if (GNU_Thread_Model == ThreadModel.Posix) 1189 return fakePureMutexUnlock(&_handle); 1190 else 1191 { 1192 static if (GNU_Thread_Model == ThreadModel.Win32) 1193 fakePureMutexUnlock(&_handle); 1194 return 0; 1195 } 1196 } 1197 1198 private: 1199 atomicMutexHandle _handle; 1200 bool _inited; 1201 } 1202 1203 // Internal static mutex reference. 1204 private AtomicMutex* _getAtomicMutex() @trusted @nogc nothrow 1205 { 1206 __gshared static AtomicMutex mutex; 1207 return &mutex; 1208 } 1209 1210 // Pure alias for _getAtomicMutex. 1211 pragma(mangle, _getAtomicMutex.mangleof) 1212 private AtomicMutex* getAtomicMutex() pure @trusted @nogc nothrow @property; 1213 } 1214 } 1215 1216 private: 1217 1218 version (Windows) 1219 { 1220 enum RegisterReturn(T) = is(T : U[], U) || is(T : R delegate(A), R, A...); 1221 } 1222 1223 enum CanCAS(T) = is(T : ulong) || 1224 is(T == class) || 1225 is(T == interface) || 1226 is(T : U*, U) || 1227 is(T : U[], U) || 1228 is(T : R delegate(A), R, A...) || 1229 (is(T == struct) && __traits(isPOD, T) && 1230 (T.sizeof <= size_t.sizeof*2 || // no more than 2 words 1231 (T.sizeof == 16 && has128BitCAS)) && // or supports 128-bit CAS 1232 (T.sizeof & (T.sizeof - 1)) == 0 // is power of 2 1233 ); 1234 1235 template IntOrLong(T) 1236 { 1237 static if (T.sizeof > 4) 1238 alias IntOrLong = long; 1239 else 1240 alias IntOrLong = int; 1241 } 1242 1243 // NOTE: x86 loads implicitly have acquire semantics so a memory 1244 // barrier is only necessary on releases. 1245 template needsLoadBarrier( MemoryOrder ms ) 1246 { 1247 enum bool needsLoadBarrier = ms == MemoryOrder.seq; 1248 } 1249 1250 1251 // NOTE: x86 stores implicitly have release semantics so a memory 1252 // barrier is only necessary on acquires. 1253 template needsStoreBarrier( MemoryOrder ms ) 1254 { 1255 enum bool needsStoreBarrier = ms == MemoryOrder.seq; 1256 } 1257 1258 // this is a helper to build asm blocks 1259 string simpleFormat(string format, scope string[] args) 1260 { 1261 string result; 1262 outer: while (format.length) 1263 { 1264 foreach (i; 0 .. format.length) 1265 { 1266 if (format[i] == '%' || format[i] == '?') 1267 { 1268 bool isQ = format[i] == '?'; 1269 result ~= format[0 .. i++]; 1270 assert (i < format.length, "Invalid format string"); 1271 if (format[i] == '%' || format[i] == '?') 1272 { 1273 assert(!isQ, "Invalid format string"); 1274 result ~= format[i++]; 1275 } 1276 else 1277 { 1278 int index = 0; 1279 assert (format[i] >= '0' && format[i] <= '9', "Invalid format string"); 1280 while (i < format.length && format[i] >= '0' && format[i] <= '9') 1281 index = index * 10 + (ubyte(format[i++]) - ubyte('0')); 1282 if (!isQ) 1283 result ~= args[index]; 1284 else if (!args[index]) 1285 { 1286 size_t j = i; 1287 for (; j < format.length;) 1288 { 1289 if (format[j++] == '\n') 1290 break; 1291 } 1292 i = j; 1293 } 1294 } 1295 format = format[i .. $]; 1296 continue outer; 1297 } 1298 } 1299 result ~= format; 1300 break; 1301 } 1302 return result; 1303 }