Skip to content

Commit 8ba0a49

Browse files
authored
Synchronize the threads to have n_shared correctly before calculation of the relative number of shared measurements (#1014)
1 parent b28b2ee commit 8ba0a49

File tree

2 files changed

+11
-2
lines changed

2 files changed

+11
-2
lines changed

device/cuda/src/ambiguity_resolution/kernels/remove_tracks.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,6 @@ __global__ void remove_tracks(device::remove_tracks_payload payload) {
140140

141141
shared_tids[threadIndex] = static_cast<unsigned int>(tracks[alive_idx]);
142142

143-
__syncthreads();
144-
145143
auto tid = shared_tids[threadIndex];
146144

147145
const auto m_count = static_cast<unsigned int>(thrust::count(
@@ -151,6 +149,8 @@ __global__ void remove_tracks(device::remove_tracks_payload payload) {
151149
vecmem::device_atomic_ref<unsigned int>(n_shared.at(tid))
152150
.fetch_sub(m_count);
153151

152+
__syncthreads();
153+
154154
bool already_pushed = false;
155155
for (unsigned int i = 0; i < threadIndex; ++i) {
156156
if (shared_tids[i] == tid) {

tests/cuda/test_ambiguity_resolution.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -942,6 +942,15 @@ INSTANTIATE_TEST_SUITE_P(
942942
std::array<std::size_t, 2u>{3u, 10u},
943943
100u, false)));
944944

945+
INSTANTIATE_TEST_SUITE_P(
946+
Long, GreedyResolutionCompareToCPU,
947+
::testing::Values(std::make_tuple(3u, 10000u,
948+
std::array<std::size_t, 2u>{3u, 500u},
949+
10000u, true),
950+
std::make_tuple(3u, 10000u,
951+
std::array<std::size_t, 2u>{3u, 500u},
952+
10000u, false)));
953+
945954
INSTANTIATE_TEST_SUITE_P(
946955
Simple, GreedyResolutionCompareToCPU,
947956
::testing::Values(

0 commit comments

Comments
 (0)