Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions slurm/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
# Build RPM Packages - when true, this will biuld RPM packages for slurm
# and optionally PMIX then exit the role and complete.

# You can use cuda-toolkit or cuda-nvml-devel
# CUDA Details
slurm_cuda_toolkit: "cuda-toolkit-13.0"
slurm_cuda_toolkit_version: "13-0"

# RPM Build Details
#======================
slurm_build_rpms: false
Expand All @@ -10,6 +15,10 @@ slurm_rpmbuild_user_home: "/home/{{ slurm_rpmbuild_user }}"
slurm_rpm_final_path: "/home/{{ slurm_rpmbuild_user }}/rpmbuild/RPMS/{{ ansible_architecture }}"
slurm_local_repo_def_path: /etc/yum.repos.d/slurm.repo

# RPM build: optional NVML support. When set and slurm_enable_nvml is true,
# the rpmbuild command will be passed --with-nvml=<path> so Slurm builds with NVML.
slurm_rpmbuild_nvml_path: "/usr/local/cuda-{{ slurm_cuda_toolkit_version|replace('-', '.') }}"

# Local Slurm Repo Details
#==========================
# This assumes an HTTP based RPM Repo
Expand Down Expand Up @@ -122,6 +131,7 @@ slurm_rpm_repo: ""
slurm_build_jobs: 8
slurm_enable_cgroup_conf: true
slurm_enable_restd: true
slurm_enable_nvml: false
slurm_restd_port: 8911
slurm_restd_host: "0.0.0.0"
slurm_restd_user: srestd
Expand Down
28 changes: 28 additions & 0 deletions slurm/tasks/pre_install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,34 @@
- slurm_install_controller
- slurm_install_slurmd

- name: Install nvml packages
ansible.builtin.dnf:
name: "{{item}}"
state: present
enablerepo: "{{ slurm_el_repos }}"
loop: "{{ slurm_cuda_packages }}"
when: false
#slurm_enable_nvml and ansible_distribution_major_version | int >= 8
tags:
- slurm
- slurm_install_controller
- slurm_install_slurmd

- name: Add NVIDIA CUDA repo for RHEL 9
ansible.builtin.yum_repository:
name: cuda-rhel9-x86_64
description: NVIDIA CUDA repository for RHEL 9 x86_64
baseurl: https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64
enabled: yes
gpgcheck: yes
gpgkey: https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/D42D0685.pub
state: present
when: slurm_enable_nvml and ansible_distribution_major_version | int == 9
tags:
- slurm
- slurm_install_controller
- slurm_install_slurmd

- name: Add slurm group
ansible.builtin.group:
name: slurm
Expand Down
17 changes: 15 additions & 2 deletions slurm/tasks/rpmbuild_slurm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,22 @@
ansible.builtin.debug:
msg: "rpmbuild command: {{ rpmbuild_cmd }} {{ rpmbuild_pmix_option }} {{ rpmbuild_restd_option }} -ta slurm-{{ slurm_version }}.tar.bz2"

- name: rpmbuild nvml option default to ""
ansible.builtin.set_fact:
rpmbuild_nvml_option: ""

- name: rpmbuild nvml option
ansible.builtin.set_fact:
rpmbuild_nvml_option: "-D '_with_nvml --with-nvml={{ slurm_rpmbuild_nvml_path }}'"
when: slurm_enable_nvml

- name: show build command with nvml
ansible.builtin.debug:
msg: "rpmbuild command: {{ rpmbuild_cmd }} {{ rpmbuild_pmix_option }} {{ rpmbuild_restd_option }} {{ rpmbuild_nvml_option }} -ta slurm-{{ slurm_version }}.tar.bz2"

- name: rpmbuild from source tarball
command: "{{ rpmbuild_cmd }} {{ rpmbuild_pmix_option }} {{ rpmbuild_restd_option }} -ta slurm-{{ slurm_version }}.tar.bz2"
command: "{{ rpmbuild_cmd }} {{ rpmbuild_pmix_option }} {{ rpmbuild_restd_option }} {{ rpmbuild_nvml_option }} -ta slurm-{{ slurm_version }}.tar.bz2"
args:
chdir: "{{slurm_tmpdir}}"
become: true
become_user: "{{ slurm_rpmbuild_user }}"
become_user: "{{ slurm_rpmbuild_user }}"
3 changes: 3 additions & 0 deletions slurm/vars/RedHat-7.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ slurm_required_packages:
- pmix
- pmix-devel

slurm_cuda_packages:
- "{{ slurm_cuda_toolkit }}"

slurm_dbd_required_packages:
- mariadb-server
- mariadb-devel
Expand Down
3 changes: 3 additions & 0 deletions slurm/vars/RedHat-8.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ slurm_required_packages:
- lua-devel
- pam-devel

slurm_cuda_packages:
- "{{ slurm_cuda_toolkit }}"

slurm_dbd_required_packages:
- mariadb-server
- mariadb-devel
Expand Down
3 changes: 3 additions & 0 deletions slurm/vars/RedHat-9.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ slurm_required_packages:
- s-nail
- lua

slurm_cuda_packages:
- "{{ slurm_cuda_toolkit }}"

slurm_required_devel_packages:
- zlib-devel
- bzip2-devel
Expand Down
Loading