Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
ace35a5
added ucp backend to compilation setup
TobiasZehetmair Jan 7, 2025
d3bfe48
added ucp backend source code that i created before forking from offi…
TobiasZehetmair Jan 7, 2025
1410cbd
added testing script
TobiasZehetmair Jan 7, 2025
f1b3818
created tcp module and moved tcp connection setup into it
TobiasZehetmair Jan 8, 2025
b44e1da
moved tcp resize communication into seperate library
TobiasZehetmair Jan 13, 2025
7726b88
set newcomer like ids to -1 during resize to indicate that they do no…
TobiasZehetmair Jan 13, 2025
40690e1
added simple command parser for resize
TobiasZehetmair Jan 14, 2025
6ecb8b8
added script that runs all common tests
TobiasZehetmair Jan 14, 2025
a48bf52
housekeeping: removed redundant code
TobiasZehetmair Jan 14, 2025
2049725
housekeeping: fixed naming
TobiasZehetmair Jan 14, 2025
94820a7
implemented simple remove peer functionality (processes are only noti…
TobiasZehetmair Jan 16, 2025
9c94b16
added peer removal and added status to peer information
TobiasZehetmair Jan 22, 2025
33e6ec1
housekeeping
TobiasZehetmair Jan 27, 2025
593e6f1
naive rdma implementation
TobiasZehetmair Jan 28, 2025
550ec2e
fixed rkey unpacking
TobiasZehetmair Jan 28, 2025
a6e53c8
added environment setup for the pkg manager
Jan 30, 2025
2b0b2b8
improved rdma send/ receive
Feb 4, 2025
a7466e2
fixed faulty rkey memory allocation
Feb 5, 2025
fc02f9d
extended backend api to enable backend specific allocator
Feb 20, 2025
23e8d6f
backend allocates memory for laik data and maps them into rdma. backe…
Feb 20, 2025
2baf4d6
change logging level for rdma memory handler
Feb 25, 2025
9fa5781
overlapping memory usage example
Feb 25, 2025
9fe2834
branch setup
Feb 25, 2025
8a572c8
removed comments
Feb 25, 2025
9d6d3c1
added TCP safe read and safe write
Feb 25, 2025
c8b3806
Merge branch 'master' of github.com:envelope-project/laik into ucp-ba…
Feb 26, 2025
eb98c5f
housekeeping
Feb 27, 2025
06e2cda
Added support for Common Tests and implemented KVS synchronization
Feb 27, 2025
77a1a7e
fixed potential overflow bug
Mar 12, 2025
5946501
fixed overflow bug
Mar 12, 2025
d1fa80a
quick fix for remote key mishandling
TobiasZehetmair Mar 12, 2025
d03418c
fixed communication overhead
TobiasZehetmair Mar 21, 2025
2367da2
remove barrier
TobiasZehetmair Mar 22, 2025
41f4c08
housekeeping
TobiasZehetmair Apr 7, 2025
55aa079
housekeeping and added all test cases
TobiasZehetmair Apr 7, 2025
c53a1a4
delete example that was added by accident
TobiasZehetmair Apr 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ SRCS += $(wildcard $(SDIR)src/backends/tcp/*.c)
IFLAGS += $(TCP_INC)
LDLIBS += $(TCP_LIBS)
endif
ifdef USE_UCP
SRCS += $(wildcard $(SDIR)src/backends/ucp/*.c)
IFLAGS += $(UCP_INC)
LDLIBS += $(UCP_LIBS)
endif

HEADERS = $(wildcard $(SDIR)include/*.h $(SDIR)include/laik/*.h)
OBJS = $(SRCS:$(SDIR)%.c=%.o)

Expand Down
74 changes: 51 additions & 23 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,45 @@ if "CC" in os.environ:
# Command line parsing

parser = argparse.ArgumentParser()
parser.add_argument("--no-ucp", help="disable UCP backend", action="store_true")
parser.add_argument("--no-fabric", help="disable Libfabric backend",
action="store_true")
parser.add_argument("--no-tcp", help="disable TCP backend", action="store_true")
parser.add_argument("--no-mpi", help="disable MPI backend", action="store_true")
parser.add_argument("--no-mqtt", help="disable MQTT support", action="store_true")
args = parser.parse_args()
use_fabric = not args.no_fabric
use_mpi = not args.no_mpi
use_ucp = not args.no_ucp
use_tcp = not args.no_tcp
use_mqtt = not args.no_mqtt

pkgc_found = bool(shutil.which("pkg-config"))
# Enable a backend if pkg-config finds its dependency
# This behaviour is exactly the same for TCP and Libfabric backends,
# thus a common function makes sense
def enable_if_found(name, pkgname, ubuntu, pkgopts=[]):
global pkgc_found, defs, miscvars, test_subdirs
if not pkgc_found:
print("%s backend disabled: pkg-config not found." % (name))
return

found = not subprocess.call(['pkg-config'] + pkgopts + [pkgname])
if not found:
print("%s backend disabled: %s not found." % (name, pkgname))
print(" On Ubuntu, install '%s'" % (ubuntu))
return

print("%s backend enabled (%s found)." % (name, pkgname))
pkg_inc = os.popen("pkg-config --cflags %s" % (pkgname)).read().strip()
pkg_libs = os.popen("pkg-config --libs %s" % (pkgname)).read().strip()
up = name.upper()
defs += " -DUSE_%s" % (up)
miscvars += "USE_%s=1\n" % (up)
miscvars += "%s_INC=%s\n" % (up, pkg_inc)
miscvars += "%s_LIBS=%s\n" % (up, pkg_libs)
test_subdirs += " %s" % (name.lower())

#------------------------------------
# Mosquitto/MQTT

Expand Down Expand Up @@ -116,34 +147,31 @@ else:
# TCP backend support

if use_tcp:
pkgc_found = bool(shutil.which("pkg-config"))
if pkgc_found:
# check gio version
glib_versionok = not subprocess.call(['pkg-config', '--atleast-version=2.44', 'gio-2.0'])
if use_tcp and pkgc_found and glib_versionok:
print("TCP backend enabled (glib-2.0/gio-2.0 found).")
glib_inc = os.popen('pkg-config --cflags gio-2.0').read().strip()
glib_libs = os.popen('pkg-config --libs gio-2.0').read().strip()
defs += " -DUSE_TCP"
miscvars += "USE_TCP=1\n"
miscvars += "TCP_INC=" + glib_inc + "\n"
miscvars += "TCP_LIBS=" + glib_libs + "\n"
test_subdirs += " tcp"
enable_if_found("TCP", "gio-2.0", "libglib2.0-dev", ["--atleast-version=2.44"])
else:
print("TCP backend disabled.")

#------------------------------------
# Libfabric backend support

if use_fabric:
enable_if_found("Fabric", "libfabric", "libfabric-dev")
else:
if not use_tcp:
print("TCP backend disabled.")
elif not pkgc_found:
print("TCP backend disabled: pkg-config required to detect glib-2.0 dependency.")
else:
print("TCP backend disabled: glib-2.0/gio-2.0 not found.")
print(" On Ubuntu, install 'libglib2.0-dev'")
print("Fabric backend disabled.")

#------------------------------------
# TCP2 backend support: always enable
print("TCP2 backend enabled.")
defs += " -DUSE_TCP2"
test_subdirs += " tcp2"

#------------------------------------
# UCP backend support
if use_ucp:
enable_if_found("UCP", "ucx", "libucx-dev")
else:
print("UCP backend disabled.")

#------------------------------------
# C++ support
# LAIK does not use C++ itself, but there is a C++ example
Expand Down Expand Up @@ -236,11 +264,11 @@ sdir = os.path.dirname(os.path.realpath(__file__))
if bdir != sdir:
print("Detected src != build directory")
# generate a mirror directory hierarchy for generated files
for dir in ["src", "src/backends", "src/backends/tcp",
for dir in ["src", "src/backends", "src/backends/tcp", "src/backends/ucp"
"examples","examples/c++","external",
"external/MQTT","external/simple",
"tests","tests/src","tests/mpi",
"tests/tcp","tests/tcp2"]:
"tests/tcp","tests/tcp2","tests/ucp"]:
if not os.path.exists(dir):
os.makedirs(dir)
print(" created directory '" + dir + "'")
Expand All @@ -249,7 +277,7 @@ if bdir != sdir:
for dir in ["","examples/","examples/c++/",
"external/MQTT/", "external/simple/",
"tests/", "tests/src/", "tests/mpi/",
"tests/tcp/", "tests/tcp2/"]:
"tests/tcp/", "tests/tcp2/", "tests/ucp"]:
mfile = open(dir + "Makefile", 'w')
mfile.write("# Generated by 'configure'.\n")
mfile.write("SDIR=" + sdir + "/" + dir + "\n")
Expand Down
8 changes: 8 additions & 0 deletions include/laik-backend-ucp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#ifndef LAIK_BACKEND_UCP_H
#define LAIK_BACKEND_UCP_H

#include "laik.h"

Laik_Instance* laik_init_ucp(int *argc, char ***argv);

#endif /* LAIK_BACKEND_UCP_H */
2 changes: 2 additions & 0 deletions include/laik/backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ struct _Laik_Backend {
// for elasticity: removal of processes which got started in a previous
// resize is finished. They can be marked as dead and resources freed
void (*finish_resize)();

Laik_Allocator* (*allocator)();
};


Expand Down
43 changes: 43 additions & 0 deletions src/backends/ucp/backend-ucp-types.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
//*********************************************************************************
#pragma once

//*********************************************************************************
#include <ucp/api/ucp.h>

//*********************************************************************************
// Initialized as NEW
typedef enum _State
{
NEW = 1, // process is only in new group
INHERITED, // process is in old and new group
INREMOVE1, // process is marked to be removed
INREMOVE2, // process will be removed from laik group (is only in old group)
DEAD // process is no longer used
} State;

//*********************************************************************************
typedef struct _Peer
{
State state;
size_t addrlen;
ucp_address_t *address;
} Peer;

//*********************************************************************************
// Global struct used to describe each processes' current state
typedef struct _InstData
{
State state;
int number_dead;
char host[64]; // my hostname
char location[128]; // my location
int mylid; // location id
int world_size; // total number of location ids/ peers (can only grow)
int phase; // current pohase
int epoch; // current epoch
size_t addrlen; // local ucx address length
ucp_address_t *address; // local ucx address, memory is handled by the ucp worker
Peer *peer;
} InstData;

//*********************************************************************************
Loading