Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
6f3bf4c
test/bench: adapt p2p tests to multi-pair tests
hzhou Aug 7, 2025
005451f
test/bench: add p2p_self test
hzhou Aug 8, 2025
2e75378
misc: add MPIR_async_test
hzhou Jan 31, 2024
74630f4
ch4/ofi: create general pipeline chunk pools
hzhou Aug 5, 2025
3f25663
ch4/ofi: wrap initialization of per-vci struct
hzhou Aug 6, 2025
9213ff5
ch4/ofi: make MPIDI_OFI_request_t a union
hzhou Aug 10, 2025
0a30064
ch4/ofi: add MPIR_CVAR_CH4_OFI_RNDV_PROTOCOL
hzhou Aug 5, 2025
255ad49
ch4/ofi: add MPIDI_OFI_RNDV_{send,recv}_hdr
hzhou Aug 7, 2025
70e6d15
ch4/ofi: avoid posting large buffer in recv
hzhou Aug 5, 2025
7bdf40b
ch4/ofi: add rndv pipeline protocol
hzhou Aug 1, 2025
d12c93b
ch4/ofi: add rndv read protocol
hzhou Aug 6, 2025
228e722
ch4/ofi: add rndv write protocol
hzhou Aug 8, 2025
b7d35ce
ch4/ofi: remove the old gpu pipeline code
hzhou Aug 7, 2025
a80ced9
ch4/ofi: remove the huge protocol
hzhou Aug 7, 2025
b5fec23
ch4/ofi: removing leftover constants
hzhou Aug 8, 2025
6acf08b
ch4/ofi: remove MPIR_CVAR_CH4_OFI_EAGER_MAX_MSG_SIZE
hzhou Aug 7, 2025
d649898
ch4/ofi: fix warnings in MPIDI_NM_progress
hzhou Aug 7, 2025
561710f
ch4/ofi: refactor and implement rndv auto selection
hzhou Aug 8, 2025
9630d42
ch4/ofi: synchronize remote_data_sz in rndv protocols
hzhou Aug 9, 2025
551c238
ch4/ofi: avoid overwriting rndv fields
hzhou Aug 10, 2025
f634d90
ch4/ofi: fix thread critical sections in rndv algorithms
hzhou Aug 11, 2025
e1a40a8
test: add tests to cover ofi rndv protocols
hzhou Aug 11, 2025
0335526
ch4/ofi: fix MPIDI_OFI_rndv_need_pack for reg_host
hzhou Aug 18, 2025
f4b1e3e
ch4/ofi: fix noinline build
hzhou Aug 20, 2025
5e8794d
ch4: pass rreq to MPIDI_NM_am_can_do_tag
hzhou Aug 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 11 additions & 16 deletions doc/mpich/tuning_parameters.md
Original file line number Diff line number Diff line change
Expand Up @@ -1096,27 +1096,22 @@ GPU pipeline uses host buffer and pipelining technique to send internode
messages instead of GPU RDMA. To enable this mode, use the following two
CVARs:

* `MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE`: This CVAR enables GPU pipeline
for inter-node pt2pt messages
* `MPIR_CVAR_CH4_OFI_GPU_PIPELINE_THRESHOLD`: The threshold to start using
GPU pipelining. Default is 1MB.
* `MPIR_CVAR_CH4_OFI_EAGER_THRESHOLD`: This CVAR enables enables the RNDV
(rendezvous) path for large messages above the threshold. Recommended value
is 1MB.

* `MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ`: Specifies the chunk size
(in bytes) for GPU pipeline data transfer.
* `MPIR_CVAR_CH4_OFI_RNDV_PROTOCOL=pipeline`: Forces the RNDV algorithm to
use pipelining. The default is "auto", which will select best algorithms
based on message attributes. Other include protocols include "read" - RDMA
read, and "direct", which relies on underlying network library implementations.

* `MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK`: Specifies the
* `MPIR_CVAR_CH4_OFI_PIPELINE_CHUNK_SZ`: Specifies the chunk size
(in bytes) for pipeline data transfer.

* `MPIR_CVAR_CH4_OFI_PIPELINE_NUM_CHUNKS`: Specifies the
number of buffers for GPU pipeline data transfer in each block/chunk of
the pool.

* `MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS`: Specifies the maximum
total number of buffers MPICH buffer pool can allocate.

* `MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE`: Specify engine type
for copying from device to host (sender side), default 0

* `MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE`: Specify engine type
for copying from host to device (receiver side), default 0

To enable GPU Direct RDMA support for pt2pt communication, use the
following CVARs:
* `MPIR_CVAR_CH4_OFI_ENABLE_HMEM`: This CVAR with a value of `1` enables
Expand Down
2 changes: 1 addition & 1 deletion src/include/mpiimpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,6 @@ typedef struct MPIR_Stream MPIR_Stream;
/******************* PART 3: DEVICE INDEPENDENT HEADERS **********************/
/*****************************************************************************/

#include "mpir_misc.h"
#include "mpir_dbg.h"
#include "mpir_objects.h"
#include "mpir_strerror.h"
Expand All @@ -166,6 +165,7 @@ typedef struct MPIR_Stream MPIR_Stream;
#include "mpir_mem.h"
#include "mpir_info.h"
#include "mpir_errcodes.h"
#include "mpir_misc.h"
#include "mpir_errhandler.h"
#include "mpir_attr_generic.h"
#include "mpir_contextid.h"
Expand Down
20 changes: 20 additions & 0 deletions src/include/mpir_misc.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,26 @@ typedef struct {
MPIR_request_type_t type;
} MPIR_gpu_req;

MPL_STATIC_INLINE_PREFIX void MPIR_async_test(MPIR_gpu_req * areq, int *is_done)
{
int err;
switch (areq->type) {
case MPIR_NULL_REQUEST:
/* a dummy, immediately complete */
*is_done = 1;
break;
case MPIR_TYPEREP_REQUEST:
MPIR_Typerep_test(areq->u.y_req, is_done);
break;
case MPIR_GPU_REQUEST:
err = MPL_gpu_test(&areq->u.gpu_req, is_done);
MPIR_Assertp(err == MPL_SUCCESS);
break;
default:
MPIR_Assert(0);
}
}

int MPIR_Localcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
void *recvbuf, MPI_Aint recvcount, MPI_Datatype recvtype);
int MPIR_Ilocalcopy(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype sendtype,
Expand Down
2 changes: 0 additions & 2 deletions src/include/mpir_typerep.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,6 @@ int MPIR_Typerep_ipack(const void *inbuf, MPI_Aint incount, MPI_Datatype datatyp
int MPIR_Typerep_iunpack(const void *inbuf, MPI_Aint insize, void *outbuf, MPI_Aint outcount,
MPI_Datatype datatype, MPI_Aint outoffset, MPI_Aint * actual_unpack_bytes,
MPIR_Typerep_req * typerep_req, uint32_t flags);
int MPIR_Typerep_wait(MPIR_Typerep_req typerep_req);
int MPIR_Typerep_test(MPIR_Typerep_req typerep_req, int *completed);

int MPIR_Typerep_size_external32(MPI_Datatype type);
int MPIR_Typerep_pack_external(const void *inbuf, MPI_Aint incount, MPI_Datatype datatype,
Expand Down
3 changes: 3 additions & 0 deletions src/mpi/datatype/typerep/src/typerep_pre.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,7 @@ typedef struct {
#define MPIR_TYPEREP_HANDLE_NULL NULL
#endif

int MPIR_Typerep_wait(MPIR_Typerep_req typerep_req);
int MPIR_Typerep_test(MPIR_Typerep_req typerep_req, int *completed);

#endif /* TYPEREP_PRE_H_INCLUDED */
7 changes: 6 additions & 1 deletion src/mpi/misc/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,12 @@ int MPIR_Ilocalcopy_gpu(const void *sendbuf, MPI_Aint sendcount, MPI_Datatype se
do_localcopy(sendbuf, sendcount, sendtype, sendoffset, recvbuf, recvcount, recvtype,
recvoffset, LOCALCOPY_NONBLOCKING, &req->u.y_req);
MPIR_ERR_CHECK(mpi_errno);
req->type = MPIR_TYPEREP_REQUEST;

if (req->u.y_req.req == MPIR_TYPEREP_REQ_NULL) {
req->type = MPIR_NULL_REQUEST;
} else {
req->type = MPIR_TYPEREP_REQUEST;
}
#endif

fn_exit:
Expand Down
4 changes: 2 additions & 2 deletions src/mpid/ch4/ch4_api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ Non Native API:
NM*: am_hdr_sz, data_sz, data, count, datatype, sreq
SHM*: am_hdr_sz, data_sz, data, count, datatype, sreq
am_can_do_tag: bool
NM*: void
SHM*: void
NM*: rreq
SHM*: rreq
am_tag_send : int
NM*: rank, comm, handler_id, tag, buf, count, datatype, src_vci, dst_vci, sreq
SHM*: rank, comm, handler_id, tag, buf, count, datatype, src_vci, dst_vci, sreq
Expand Down
4 changes: 3 additions & 1 deletion src/mpid/ch4/netmod/ofi/Makefile.mk
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ mpi_core_sources += src/mpid/ch4/netmod/ofi/func_table.c \
src/mpid/ch4/netmod/ofi/ofi_part.c \
src/mpid/ch4/netmod/ofi/ofi_events.c \
src/mpid/ch4/netmod/ofi/ofi_rndv.c \
src/mpid/ch4/netmod/ofi/ofi_huge.c \
src/mpid/ch4/netmod/ofi/ofi_rndv_read.c \
src/mpid/ch4/netmod/ofi/ofi_rndv_write.c \
src/mpid/ch4/netmod/ofi/ofi_pipeline.c \
src/mpid/ch4/netmod/ofi/ofi_progress.c \
src/mpid/ch4/netmod/ofi/ofi_am_events.c \
src/mpid/ch4/netmod/ofi/ofi_nic.c \
Expand Down
12 changes: 10 additions & 2 deletions src/mpid/ch4/netmod/ofi/ofi_am.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,17 @@ MPL_STATIC_INLINE_PREFIX bool MPIDI_NM_am_check_eager(MPI_Aint am_hdr_sz, MPI_Ai
}
}

MPL_STATIC_INLINE_PREFIX bool MPIDI_NM_am_can_do_tag(void)
MPL_STATIC_INLINE_PREFIX bool MPIDI_NM_am_can_do_tag(MPIR_Request * rreq)
{
return MPIDI_OFI_ENABLE_TAGGED;
if (MPIDI_OFI_ENABLE_TAGGED) {
MPI_Aint data_sz;
MPIR_Datatype_get_size_macro(MPIDIG_REQUEST(rreq, datatype), data_sz);
data_sz *= MPIDIG_REQUEST(rreq, count);
if (data_sz <= MPIDI_OFI_global.max_msg_size) {
return true;
}
}
return false;
}

MPL_STATIC_INLINE_PREFIX MPIDIG_recv_data_copy_cb MPIDI_NM_am_get_data_copy_cb(uint32_t attr)
Expand Down
Loading