Skip to content

Commit a660c7c

Browse files
committed
Teams: Correct mapping of teams members.
1 parent f9a68b9 commit a660c7c

File tree

1 file changed

+39
-36
lines changed

1 file changed

+39
-36
lines changed

src/runtime-libraries/mpi/mpi_caf.c

Lines changed: 39 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ error_stop_str(const char *string, size_t len, bool quiet)
172172

173173
/* Global variables. */
174174
static int caf_this_image;
175+
static int mpi_this_image;
175176
static int caf_num_images = 0;
176177
static int caf_is_finalized = 0;
177178
static MPI_Win global_dynamic_win;
@@ -898,10 +899,10 @@ PREFIX(init)(int *argc, char ***argv)
898899

899900
ierr = MPI_Comm_size(CAF_COMM_WORLD, &caf_num_images);
900901
chk_err(ierr);
901-
ierr = MPI_Comm_rank(CAF_COMM_WORLD, &caf_this_image);
902+
ierr = MPI_Comm_rank(CAF_COMM_WORLD, &mpi_this_image);
902903
chk_err(ierr);
903904

904-
++caf_this_image;
905+
caf_this_image = mpi_this_image + 1;
905906
caf_is_finalized = 0;
906907

907908
/* BEGIN SYNC IMAGE preparation
@@ -1007,22 +1008,22 @@ finalize_internal(int status_code)
10071008
chk_err(ierr);
10081009
#endif
10091010
/* For future security enclose setting img_status in a lock. */
1010-
CAF_Win_lock(MPI_LOCK_EXCLUSIVE, caf_this_image - 1, *stat_tok);
1011+
CAF_Win_lock(MPI_LOCK_EXCLUSIVE, mpi_this_image, *stat_tok);
10111012
if (status_code == 0)
10121013
{
10131014
img_status = STAT_STOPPED_IMAGE;
10141015
#ifdef WITH_FAILED_IMAGES
1015-
image_stati[caf_this_image - 1] = STAT_STOPPED_IMAGE;
1016+
image_stati[mpi_this_image] = STAT_STOPPED_IMAGE;
10161017
#endif
10171018
}
10181019
else
10191020
{
10201021
img_status = status_code;
10211022
#ifdef WITH_FAILED_IMAGES
1022-
image_stati[caf_this_image - 1] = status_code;
1023+
image_stati[mpi_this_image] = status_code;
10231024
#endif
10241025
}
1025-
CAF_Win_unlock(caf_this_image - 1, *stat_tok);
1026+
CAF_Win_unlock(mpi_this_image, *stat_tok);
10261027

10271028
/* Announce to all other images, that this one has changed its execution
10281029
* status. */
@@ -1368,11 +1369,11 @@ void PREFIX(register)(size_t size, caf_register_t type, caf_token_t *token,
13681369
if (l_var)
13691370
{
13701371
init_array = (int *)calloc(size, sizeof(int));
1371-
CAF_Win_lock(MPI_LOCK_EXCLUSIVE, caf_this_image - 1, *p);
1372-
ierr = MPI_Put(init_array, size, MPI_INT, caf_this_image - 1, 0, size,
1372+
CAF_Win_lock(MPI_LOCK_EXCLUSIVE, mpi_this_image, *p);
1373+
ierr = MPI_Put(init_array, size, MPI_INT, mpi_this_image, 0, size,
13731374
MPI_INT, *p);
13741375
chk_err(ierr);
1375-
CAF_Win_unlock(caf_this_image - 1, *p);
1376+
CAF_Win_unlock(mpi_this_image, *p);
13761377
free(init_array);
13771378
}
13781379

@@ -1469,11 +1470,11 @@ void *PREFIX(register)(size_t size, caf_register_t type, caf_token_t *token,
14691470
if (l_var)
14701471
{
14711472
init_array = (int *)calloc(size, sizeof(int));
1472-
CAF_Win_lock(MPI_LOCK_EXCLUSIVE, caf_this_image - 1, *p);
1473-
ierr = MPI_Put(init_array, size, MPI_INT, caf_this_image - 1, 0, size,
1474-
MPI_INT, *p);
1473+
CAF_Win_lock(MPI_LOCK_EXCLUSIVE, mpi_this_image, *p);
1474+
ierr = MPI_Put(init_array, size, MPI_INT, mpi_this_image, 0, size, MPI_INT,
1475+
*p);
14751476
chk_err(ierr);
1476-
CAF_Win_unlock(caf_this_image - 1, *p);
1477+
CAF_Win_unlock(mpi_this_image, *p);
14771478
free(init_array);
14781479
}
14791480

@@ -3576,16 +3577,23 @@ PREFIX(get)(caf_token_t token, size_t offset, int image_index,
35763577
bool free_pad_str = false, free_t_buff = false;
35773578
const bool dest_char_array_is_longer
35783579
= dst_type == BT_CHARACTER && dst_size > src_size && !same_image;
3579-
int remote_image = image_index - 1;
3580+
int remote_image = image_index - 1, this_image = mpi_this_image;
3581+
35803582
if (!same_image)
35813583
{
35823584
MPI_Group current_team_group, win_group;
3585+
int trans_ranks[2];
35833586
ierr = MPI_Comm_group(CAF_COMM_WORLD, &current_team_group);
35843587
chk_err(ierr);
35853588
ierr = MPI_Win_get_group(*p, &win_group);
35863589
chk_err(ierr);
3587-
ierr = MPI_Group_translate_ranks(
3588-
current_team_group, 1, (int[]){remote_image}, win_group, &remote_image);
3590+
ierr = MPI_Group_translate_ranks(current_team_group, 2,
3591+
(int[]){remote_image, this_image},
3592+
win_group, trans_ranks);
3593+
dprint("rank translation: remote: %d -> %d, this: %d -> %d.\n",
3594+
remote_image, trans_ranks[0], this_image, trans_ranks[1]);
3595+
remote_image = trans_ranks[0];
3596+
this_image = trans_ranks[1];
35893597
chk_err(ierr);
35903598
ierr = MPI_Group_free(&current_team_group);
35913599
chk_err(ierr);
@@ -3615,8 +3623,8 @@ PREFIX(get)(caf_token_t token, size_t offset, int image_index,
36153623
if (size == 0)
36163624
return;
36173625

3618-
dprint("src_vector = %p, image_index = %d, offset = %zd.\n", src_vector,
3619-
image_index, offset);
3626+
dprint("src_vector = %p, image_index = %d (remote = %d), offset = %zd.\n",
3627+
src_vector, image_index, remote_image, offset);
36203628
check_image_health(image_index, stat);
36213629

36223630
/* For char arrays: create the padding array, when dst is longer than src. */
@@ -7992,8 +8000,7 @@ PREFIX(atomic_define)(caf_token_t token, size_t offset, int image_index,
79928000
{
79938001
MPI_Win *p = TOKEN(token);
79948002
MPI_Datatype dt;
7995-
int ierr = 0,
7996-
image = (image_index != 0) ? image_index - 1 : caf_this_image - 1;
8003+
int ierr = 0, image = (image_index != 0) ? image_index - 1 : mpi_this_image;
79978004

79988005
selectType(kind, &dt);
79998006

@@ -8024,8 +8031,7 @@ PREFIX(atomic_ref)(caf_token_t token, size_t offset, int image_index,
80248031
{
80258032
MPI_Win *p = TOKEN(token);
80268033
MPI_Datatype dt;
8027-
int ierr = 0,
8028-
image = (image_index != 0) ? image_index - 1 : caf_this_image - 1;
8034+
int ierr = 0, image = (image_index != 0) ? image_index - 1 : mpi_this_image;
80298035

80308036
selectType(kind, &dt);
80318037

@@ -8056,8 +8062,7 @@ PREFIX(atomic_cas)(caf_token_t token, size_t offset, int image_index, void *old,
80568062
{
80578063
MPI_Win *p = TOKEN(token);
80588064
MPI_Datatype dt;
8059-
int ierr = 0,
8060-
image = (image_index != 0) ? image_index - 1 : caf_this_image - 1;
8065+
int ierr = 0, image = (image_index != 0) ? image_index - 1 : mpi_this_image;
80618066

80628067
selectType(kind, &dt);
80638068

@@ -8088,7 +8093,7 @@ PREFIX(atomic_op)(int op, caf_token_t token, size_t offset, int image_index,
80888093
int ierr = 0;
80898094
MPI_Datatype dt;
80908095
MPI_Win *p = TOKEN(token);
8091-
int image = (image_index != 0) ? image_index - 1 : caf_this_image - 1;
8096+
int image = (image_index != 0) ? image_index - 1 : mpi_this_image;
80928097

80938098
#if MPI_VERSION >= 3
80948099
old = malloc(kind);
@@ -8143,7 +8148,7 @@ PREFIX(event_post)(caf_token_t token, size_t index, int image_index, int *stat,
81438148
int value = 1, ierr = 0, flag;
81448149
MPI_Win *p = TOKEN(token);
81458150
const char msg[] = "Error on event post";
8146-
int image = (image_index == 0) ? caf_this_image - 1 : image_index - 1;
8151+
int image = (image_index == 0) ? mpi_this_image : image_index - 1;
81478152

81488153
if (stat != NULL)
81498154
*stat = 0;
@@ -8181,7 +8186,7 @@ void
81818186
PREFIX(event_wait)(caf_token_t token, size_t index, int until_count, int *stat,
81828187
char *errmsg, charlen_t errmsg_len)
81838188
{
8184-
int ierr = 0, count = 0, i, image = caf_this_image - 1;
8189+
int ierr = 0, count = 0, i, image = mpi_this_image;
81858190
int *var = NULL, flag, old = 0, newval = 0;
81868191
const int spin_loop_max = 20000;
81878192
MPI_Win *p = TOKEN(token);
@@ -8247,8 +8252,7 @@ PREFIX(event_query)(caf_token_t token, size_t index, int image_index,
82478252
int *count, int *stat)
82488253
{
82498254
MPI_Win *p = TOKEN(token);
8250-
int ierr = 0,
8251-
image = (image_index == 0) ? caf_this_image - 1 : image_index - 1;
8255+
int ierr = 0, image = (image_index == 0) ? mpi_this_image : image_index - 1;
82528256

82538257
if (stat != NULL)
82548258
*stat = 0;
@@ -8587,13 +8591,12 @@ PREFIX(form_team)(int team_id, caf_team_t *team,
85878591
int index __attribute__((unused)))
85888592
{
85898593
struct caf_teams_list *tmp;
8590-
void *tmp_team;
85918594
MPI_Comm *newcomm;
8592-
MPI_Comm *current_comm = &CAF_COMM_WORLD;
8595+
MPI_Comm current_comm = CAF_COMM_WORLD;
85938596
int ierr;
85948597

85958598
newcomm = (MPI_Comm *)calloc(1, sizeof(MPI_Comm));
8596-
ierr = MPI_Comm_split(*current_comm, team_id, caf_this_image, newcomm);
8599+
ierr = MPI_Comm_split(current_comm, team_id, mpi_this_image, newcomm);
85978600
chk_err(ierr);
85988601

85998602
tmp = calloc(1, sizeof(struct caf_teams_list));
@@ -8643,9 +8646,9 @@ PREFIX(change_team)(caf_team_t *team, int coselector __attribute__((unused)))
86438646
tmp_team = tmp_used->team_list_elem->team;
86448647
tmp_comm = (MPI_Comm *)tmp_team;
86458648
CAF_COMM_WORLD = *tmp_comm;
8646-
int ierr = MPI_Comm_rank(*tmp_comm, &caf_this_image);
8649+
int ierr = MPI_Comm_rank(*tmp_comm, &mpi_this_image);
86478650
chk_err(ierr);
8648-
caf_this_image++;
8651+
caf_this_image = mpi_this_image + 1;
86498652
ierr = MPI_Comm_size(*tmp_comm, &caf_num_images);
86508653
chk_err(ierr);
86518654
ierr = MPI_Barrier(*tmp_comm);
@@ -8696,9 +8699,9 @@ PREFIX(end_team)(caf_team_t *team __attribute__((unused)))
86968699
tmp_comm = (MPI_Comm *)tmp_team;
86978700
CAF_COMM_WORLD = *tmp_comm;
86988701
/* CAF_COMM_WORLD = (MPI_Comm)*tmp_used->team_list_elem->team; */
8699-
ierr = MPI_Comm_rank(CAF_COMM_WORLD, &caf_this_image);
8702+
ierr = MPI_Comm_rank(CAF_COMM_WORLD, &mpi_this_image);
87008703
chk_err(ierr);
8701-
caf_this_image++;
8704+
caf_this_image = mpi_this_image + 1;
87028705
ierr = MPI_Comm_size(CAF_COMM_WORLD, &caf_num_images);
87038706
chk_err(ierr);
87048707
}

0 commit comments

Comments
 (0)