Skip to content

Commit 2e4b5ee

Browse files
committed
Teams: Correct mapping of teams members.
1 parent 49ef8ca commit 2e4b5ee

File tree

1 file changed

+39
-36
lines changed

1 file changed

+39
-36
lines changed

src/runtime-libraries/mpi/mpi_caf.c

Lines changed: 39 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ error_stop_str(const char *string, size_t len, bool quiet)
172172

173173
/* Global variables. */
174174
static int caf_this_image;
175+
static int mpi_this_image;
175176
static int caf_num_images = 0;
176177
static int caf_is_finalized = 0;
177178
static MPI_Win global_dynamic_win;
@@ -968,10 +969,10 @@ PREFIX(init)(int *argc, char ***argv)
968969

969970
ierr = MPI_Comm_size(CAF_COMM_WORLD, &caf_num_images);
970971
chk_err(ierr);
971-
ierr = MPI_Comm_rank(CAF_COMM_WORLD, &caf_this_image);
972+
ierr = MPI_Comm_rank(CAF_COMM_WORLD, &mpi_this_image);
972973
chk_err(ierr);
973974

974-
++caf_this_image;
975+
caf_this_image = mpi_this_image + 1;
975976
caf_is_finalized = 0;
976977

977978
/* BEGIN SYNC IMAGE preparation
@@ -1082,22 +1083,22 @@ finalize_internal(int status_code)
10821083
chk_err(ierr);
10831084
#endif
10841085
/* For future security enclose setting img_status in a lock. */
1085-
CAF_Win_lock(MPI_LOCK_EXCLUSIVE, caf_this_image - 1, *stat_tok);
1086+
CAF_Win_lock(MPI_LOCK_EXCLUSIVE, mpi_this_image, *stat_tok);
10861087
if (status_code == 0)
10871088
{
10881089
img_status = STAT_STOPPED_IMAGE;
10891090
#ifdef WITH_FAILED_IMAGES
1090-
image_stati[caf_this_image - 1] = STAT_STOPPED_IMAGE;
1091+
image_stati[mpi_this_image] = STAT_STOPPED_IMAGE;
10911092
#endif
10921093
}
10931094
else
10941095
{
10951096
img_status = status_code;
10961097
#ifdef WITH_FAILED_IMAGES
1097-
image_stati[caf_this_image - 1] = status_code;
1098+
image_stati[mpi_this_image] = status_code;
10981099
#endif
10991100
}
1100-
CAF_Win_unlock(caf_this_image - 1, *stat_tok);
1101+
CAF_Win_unlock(mpi_this_image, *stat_tok);
11011102

11021103
/* Announce to all other images, that this one has changed its execution
11031104
* status. */
@@ -1464,11 +1465,11 @@ void PREFIX(register)(size_t size, caf_register_t type, caf_token_t *token,
14641465
if (l_var)
14651466
{
14661467
init_array = (int *)calloc(size, sizeof(int));
1467-
CAF_Win_lock(MPI_LOCK_EXCLUSIVE, caf_this_image - 1, *p);
1468-
ierr = MPI_Put(init_array, size, MPI_INT, caf_this_image - 1, 0, size,
1468+
CAF_Win_lock(MPI_LOCK_EXCLUSIVE, mpi_this_image, *p);
1469+
ierr = MPI_Put(init_array, size, MPI_INT, mpi_this_image, 0, size,
14691470
MPI_INT, *p);
14701471
chk_err(ierr);
1471-
CAF_Win_unlock(caf_this_image - 1, *p);
1472+
CAF_Win_unlock(mpi_this_image, *p);
14721473
free(init_array);
14731474
}
14741475

@@ -1565,11 +1566,11 @@ void *PREFIX(register)(size_t size, caf_register_t type, caf_token_t *token,
15651566
if (l_var)
15661567
{
15671568
init_array = (int *)calloc(size, sizeof(int));
1568-
CAF_Win_lock(MPI_LOCK_EXCLUSIVE, caf_this_image - 1, *p);
1569-
ierr = MPI_Put(init_array, size, MPI_INT, caf_this_image - 1, 0, size,
1570-
MPI_INT, *p);
1569+
CAF_Win_lock(MPI_LOCK_EXCLUSIVE, mpi_this_image, *p);
1570+
ierr = MPI_Put(init_array, size, MPI_INT, mpi_this_image, 0, size, MPI_INT,
1571+
*p);
15711572
chk_err(ierr);
1572-
CAF_Win_unlock(caf_this_image - 1, *p);
1573+
CAF_Win_unlock(mpi_this_image, *p);
15731574
free(init_array);
15741575
}
15751576

@@ -3672,16 +3673,23 @@ PREFIX(get)(caf_token_t token, size_t offset, int image_index,
36723673
bool free_pad_str = false, free_t_buff = false;
36733674
const bool dest_char_array_is_longer
36743675
= dst_type == BT_CHARACTER && dst_size > src_size && !same_image;
3675-
int remote_image = image_index - 1;
3676+
int remote_image = image_index - 1, this_image = mpi_this_image;
3677+
36763678
if (!same_image)
36773679
{
36783680
MPI_Group current_team_group, win_group;
3681+
int trans_ranks[2];
36793682
ierr = MPI_Comm_group(CAF_COMM_WORLD, &current_team_group);
36803683
chk_err(ierr);
36813684
ierr = MPI_Win_get_group(*p, &win_group);
36823685
chk_err(ierr);
3683-
ierr = MPI_Group_translate_ranks(
3684-
current_team_group, 1, (int[]){remote_image}, win_group, &remote_image);
3686+
ierr = MPI_Group_translate_ranks(current_team_group, 2,
3687+
(int[]){remote_image, this_image},
3688+
win_group, trans_ranks);
3689+
dprint("rank translation: remote: %d -> %d, this: %d -> %d.\n",
3690+
remote_image, trans_ranks[0], this_image, trans_ranks[1]);
3691+
remote_image = trans_ranks[0];
3692+
this_image = trans_ranks[1];
36853693
chk_err(ierr);
36863694
ierr = MPI_Group_free(&current_team_group);
36873695
chk_err(ierr);
@@ -3711,8 +3719,8 @@ PREFIX(get)(caf_token_t token, size_t offset, int image_index,
37113719
if (size == 0)
37123720
return;
37133721

3714-
dprint("src_vector = %p, image_index = %d, offset = %zd.\n", src_vector,
3715-
image_index, offset);
3722+
dprint("src_vector = %p, image_index = %d (remote = %d), offset = %zd.\n",
3723+
src_vector, image_index, remote_image, offset);
37163724
check_image_health(image_index, stat);
37173725

37183726
/* For char arrays: create the padding array, when dst is longer than src. */
@@ -8100,8 +8108,7 @@ PREFIX(atomic_define)(caf_token_t token, size_t offset, int image_index,
81008108
{
81018109
MPI_Win *p = TOKEN(token);
81028110
MPI_Datatype dt;
8103-
int ierr = 0,
8104-
image = (image_index != 0) ? image_index - 1 : caf_this_image - 1;
8111+
int ierr = 0, image = (image_index != 0) ? image_index - 1 : mpi_this_image;
81058112

81068113
selectType(kind, &dt);
81078114

@@ -8132,8 +8139,7 @@ PREFIX(atomic_ref)(caf_token_t token, size_t offset, int image_index,
81328139
{
81338140
MPI_Win *p = TOKEN(token);
81348141
MPI_Datatype dt;
8135-
int ierr = 0,
8136-
image = (image_index != 0) ? image_index - 1 : caf_this_image - 1;
8142+
int ierr = 0, image = (image_index != 0) ? image_index - 1 : mpi_this_image;
81378143

81388144
selectType(kind, &dt);
81398145

@@ -8164,8 +8170,7 @@ PREFIX(atomic_cas)(caf_token_t token, size_t offset, int image_index, void *old,
81648170
{
81658171
MPI_Win *p = TOKEN(token);
81668172
MPI_Datatype dt;
8167-
int ierr = 0,
8168-
image = (image_index != 0) ? image_index - 1 : caf_this_image - 1;
8173+
int ierr = 0, image = (image_index != 0) ? image_index - 1 : mpi_this_image;
81698174

81708175
selectType(kind, &dt);
81718176

@@ -8196,7 +8201,7 @@ PREFIX(atomic_op)(int op, caf_token_t token, size_t offset, int image_index,
81968201
int ierr = 0;
81978202
MPI_Datatype dt;
81988203
MPI_Win *p = TOKEN(token);
8199-
int image = (image_index != 0) ? image_index - 1 : caf_this_image - 1;
8204+
int image = (image_index != 0) ? image_index - 1 : mpi_this_image;
82008205

82018206
#if MPI_VERSION >= 3
82028207
old = malloc(kind);
@@ -8251,7 +8256,7 @@ PREFIX(event_post)(caf_token_t token, size_t index, int image_index, int *stat,
82518256
int value = 1, ierr = 0, flag;
82528257
MPI_Win *p = TOKEN(token);
82538258
const char msg[] = "Error on event post";
8254-
int image = (image_index == 0) ? caf_this_image - 1 : image_index - 1;
8259+
int image = (image_index == 0) ? mpi_this_image : image_index - 1;
82558260

82568261
if (stat != NULL)
82578262
*stat = 0;
@@ -8289,7 +8294,7 @@ void
82898294
PREFIX(event_wait)(caf_token_t token, size_t index, int until_count, int *stat,
82908295
char *errmsg, charlen_t errmsg_len)
82918296
{
8292-
int ierr = 0, count = 0, i, image = caf_this_image - 1;
8297+
int ierr = 0, count = 0, i, image = mpi_this_image;
82938298
int *var = NULL, flag, old = 0, newval = 0;
82948299
const int spin_loop_max = 20000;
82958300
MPI_Win *p = TOKEN(token);
@@ -8355,8 +8360,7 @@ PREFIX(event_query)(caf_token_t token, size_t index, int image_index,
83558360
int *count, int *stat)
83568361
{
83578362
MPI_Win *p = TOKEN(token);
8358-
int ierr = 0,
8359-
image = (image_index == 0) ? caf_this_image - 1 : image_index - 1;
8363+
int ierr = 0, image = (image_index == 0) ? mpi_this_image : image_index - 1;
83608364

83618365
if (stat != NULL)
83628366
*stat = 0;
@@ -8695,13 +8699,12 @@ PREFIX(form_team)(int team_id, caf_team_t *team,
86958699
int index __attribute__((unused)))
86968700
{
86978701
struct caf_teams_list *tmp;
8698-
void *tmp_team;
86998702
MPI_Comm *newcomm;
8700-
MPI_Comm *current_comm = &CAF_COMM_WORLD;
8703+
MPI_Comm current_comm = CAF_COMM_WORLD;
87018704
int ierr;
87028705

87038706
newcomm = (MPI_Comm *)calloc(1, sizeof(MPI_Comm));
8704-
ierr = MPI_Comm_split(*current_comm, team_id, caf_this_image, newcomm);
8707+
ierr = MPI_Comm_split(current_comm, team_id, mpi_this_image, newcomm);
87058708
chk_err(ierr);
87068709

87078710
tmp = calloc(1, sizeof(struct caf_teams_list));
@@ -8751,9 +8754,9 @@ PREFIX(change_team)(caf_team_t *team, int coselector __attribute__((unused)))
87518754
tmp_team = tmp_used->team_list_elem->team;
87528755
tmp_comm = (MPI_Comm *)tmp_team;
87538756
CAF_COMM_WORLD = *tmp_comm;
8754-
int ierr = MPI_Comm_rank(*tmp_comm, &caf_this_image);
8757+
int ierr = MPI_Comm_rank(*tmp_comm, &mpi_this_image);
87558758
chk_err(ierr);
8756-
caf_this_image++;
8759+
caf_this_image = mpi_this_image + 1;
87578760
ierr = MPI_Comm_size(*tmp_comm, &caf_num_images);
87588761
chk_err(ierr);
87598762
ierr = MPI_Barrier(*tmp_comm);
@@ -8804,9 +8807,9 @@ PREFIX(end_team)(caf_team_t *team __attribute__((unused)))
88048807
tmp_comm = (MPI_Comm *)tmp_team;
88058808
CAF_COMM_WORLD = *tmp_comm;
88068809
/* CAF_COMM_WORLD = (MPI_Comm)*tmp_used->team_list_elem->team; */
8807-
ierr = MPI_Comm_rank(CAF_COMM_WORLD, &caf_this_image);
8810+
ierr = MPI_Comm_rank(CAF_COMM_WORLD, &mpi_this_image);
88088811
chk_err(ierr);
8809-
caf_this_image++;
8812+
caf_this_image = mpi_this_image + 1;
88108813
ierr = MPI_Comm_size(CAF_COMM_WORLD, &caf_num_images);
88118814
chk_err(ierr);
88128815
}

0 commit comments

Comments
 (0)