>Does it fix the behaviour to make value = *ptr an atomic read? Replace that line with "value = atomic_add(ptr, 0)".
It doesn't change anything. So I assume it isn't coherence issue.
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable int f(int a) { return as_int(as_float(a)+1.0f); } __kernel void test(__global int * ptr) { int value; int new_value; do { mem_fence(CLK_GLOBAL_MEM_FENCE); //Not needed actually value = atom_add(ptr, 0); new_value=f(value); } while(value != atom_cmpxchg(ptr, value, new_value)); }
shader main asic(SI_ASIC) type(CS) s_buffer_load_dword s0, s[8:11], 0x00 // 00000000: C2000900 label_0001: s_waitcnt expcnt(0) // 00000004: BF8C1F0F s_waitcnt lgkmcnt(0) // 00000008: BF8C007F v_mov_b32 v0, s0 // 0000000C: 7E000200 v_mov_b32 v1, 0 // 00000010: 7E020280 buffer_atomic_add v1, v0, s[4:7], 0 offen glc // 00000014: E0C85000 80010100 s_waitcnt vmcnt(0) // 0000001C: BF8C1F70 v_add_f32 v2, 1.0, v1 // 00000020: 060402F2 v_mov_b32 v3, v1 // 00000024: 7E060301 buffer_atomic_cmpswap v[2:3], v0, s[4:7], 0 offen glc // 00000028: E0C45000 80010200 s_waitcnt vmcnt(0) // 00000030: BF8C1F70 v_cmp_eq_i32 vcc, v1, v2 // 00000034: 7D040501 s_cbranch_vccz label_0001 // 00000038: BF86FFF2 s_endpgm // 0000003C: BF810000 end