6 #ifndef ABTD_ASM_INT128_CAS_H_INCLUDED
7 #define ABTD_ASM_INT128_CAS_H_INCLUDED
14 #if defined(__x86_64__)
26 union u128_union cmp, with;
30 __asm__ __volatile__(
"lock cmpxchg16b %1\n"
32 :
"=&q"(result),
"+m"(*var),
"+d"(cmp.s.hi),
34 :
"c"(with.s.hi),
"b"(with.s.lo)
38 #elif defined(__aarch64__)
52 __asm__ __volatile__(
"caspal %0, %H0, %1, %H1, [%2]"
56 prev = x0 | ((__int128)x1 << 64);
58 __asm__ __volatile__(
"" :::
"memory");
59 register uint64_t x0 __asm__(
"x0") = (uint64_t)oldv;
60 register uint64_t x1 __asm__(
"x1") = (uint64_t)(oldv >> 64);
61 register uint64_t x2 __asm__(
"x2") = (uint64_t)newv;
62 register uint64_t x3 __asm__(
"x3") = (uint64_t)(newv >> 64);
63 __asm__ __volatile__(
"caspal x0, %[old2], %[newv1], %[newv2], [%[v]]"
64 : [old1]
"+r"(x0), [old2]
"+r"(x1)
65 : [newv1]
"r"(x2), [newv2]
"r"(x3), [v]
"r"(var)
67 prev = x0 | ((__int128)x1 << 64);
76 __asm__ __volatile__(
"ldaxp %0, %H0, [%1]"
85 __asm__ __volatile__(
"stlxp %w0, %1, %H1, [%2]"
92 #elif defined(__ppc64__) || defined(__PPC64__)
97 register volatile uint64_t prev0 __asm__(
"r10");
98 register volatile uint64_t prev1 __asm__(
"r11");
99 register volatile uint64_t newv0 __asm__(
"r8") = (newv >> 64);
100 register volatile uint64_t newv1 __asm__(
"r9") = newv;
101 uint64_t oldv0 = (oldv >> 64);
102 uint64_t oldv1 = oldv;
103 __asm__ __volatile__(
"\n"
105 "\tlqarx %[pv0], 0, %[ptr]\n"
106 "\tcmpd %[pv0], %[ov0]\n"
108 "\tcmpd %[pv1], %[ov1]\n"
110 "\tstqcx. %[nv0], 0, %[ptr]\n"
115 : [pv0]
"+&r"(prev0), [pv1]
"+&r"(prev1),
117 : [ptr]
"r"(var), [ov0]
"r"(oldv0), [ov1]
"r"(oldv1),
118 [nv0]
"r"(newv0), [nv1]
"r"(newv1)
124 #error "Argobots does not support 128-bit CAS for this architecture."