The following code seems to be miscompiled:
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable int f(int a) { return as_int(as_float(a)+1.0f); } __kernel void test(__global int * ptr) { int value; int new_value; do { mem_fence(CLK_GLOBAL_MEM_FENCE); value = *ptr; new_value=f(value); } while(value != atom_cmpxchg(ptr, value, new_value)); }
shader main asic(SI_ASIC) type(CS) s_buffer_load_dword s0, s[8:11], 0x00 // 00000000: C2000900 label_0001: s_waitcnt expcnt(0) // 00000004: BF8C1F0F s_waitcnt lgkmcnt(0) // 00000008: BF8C007F v_mov_b32 v0, s0 // 0000000C: 7E000200 tbuffer_load_format_x v5, v0, s[4:7], 0 offen format:[BUF_DATA_FORMAT_32,BUF_NUM_FORMAT_FLOAT] // 00000010: EBA01000 80010500 s_waitcnt vmcnt(0) // 00000018: BF8C1F70 v_add_f32 v4, 1.0, v5 // 0000001C: 06080AF2 v_mov_b32 v2, v4 // 00000020: 7E040304 v_mov_b32 v3, v5 // 00000024: 7E060305 buffer_atomic_cmpswap v[2:3], v0, s[4:7], 0 offen glc // 00000028: E0C45000 80010200 s_waitcnt vmcnt(0) // 00000030: BF8C1F70 v_cmp_eq_i32 vcc, v5, v2 // 00000034: 7D040505 s_cbranch_vccz label_0001 // 00000038: BF86FFF2 s_endpgm // 0000003C: BF810000 end
The compiler doesn't update execution mask and atom_cmpxchg executing more times than it should.