diff --git a/slurm/CHANGELOG.md b/slurm/CHANGELOG.md new file mode 100644 index 0000000..206fbad --- /dev/null +++ b/slurm/CHANGELOG.md @@ -0,0 +1,44 @@ + + +https://github.com/dstdev/ansible-roles/tree/cwr_dev + +New vars in slurm/defaults/main.yaml + - slurm_build_rpms | bool + - slurm_rpm_install + - slurm source_install + - slurm_rpmbuild_user | non-privleged user id + - slurm_rpmbuild_user_home | home path + rpmbuild base + - slurm_download_url | github or schedmd + - slurm_local_repo_name: + - slurm_local_repo_host: + - slurm_rpm_repo_scp_path: scp command to put rpms on repo + - slurm_rpm_final_path: + + +New run path when slurm_build_rpms is true + + slurm/tasks/main.yaml + slurm/tasks/rpmbuild.yaml + - creates new slurm build user + + slurm/tasks/install_dev_reqs.yaml + slurm/tasks/rpmbuild_pmix.yaml + - does rpm build - rpms in "{{ slurm_rpmbuild_user_home }}/rpmbuild" + + slurm/tasks/rpmbuild_slurm.yaml + - does rpm build - rpms in "{{ slurm_rpmbuild_user_home }}/rpmbuild" + + slurm/tasks/cleanup.yaml" + + end_play + +New install method vars: +slurm_rpm_install and slurm_source_install +slurm_rpm_install and slurm_source_install are mutually exclusive +slurm_rpm_install will install rpms from local RPM repo +slurm_source_install follows the existing source install path + +New falure check +the latest version of slurm on the rpm repo must match the stated +install path on the + diff --git a/slurm/README.md b/slurm/README.md index 192bcf2..ced90ef 100644 --- a/slurm/README.md +++ b/slurm/README.md @@ -1,25 +1,61 @@ -Role Name +Slurm ========= Install and configures slurm controller, daemons, and database. +RPMBuild Execution Path +----------------------- +When `slurm_build_rpms` is true, Slurm and optionally PMix rpms are build and copied out to the local RPM repo hosting Slurm and PMix packages. The role exits after the packages are copies over to the repo. +Running `rpmbuild` using the code bundle from github is often problematic. For this reason, the rpmbuild path downloads the source bundle from SchedMD, not GitHub. + Requirements ------------ Running Mariadb/MySQL Role or instance and munge development libraries must be installed. +Example Build Command +--------------------- +``` +ansible-playbook -i inventory.ini -c local -e @slurm_opts.json slurm.yml +``` + +Example Vars File +----------------- +slurm_opts.json +``` +{ + "slurm_build_rpms": false, + "slurm_source_install": false, + "slurm_rpm_install": true, + "slurm_version": "23.11.11", + "slurm_enable_pmix": true, + "slurm_pmix_version": "5.0.8", + "slurm_enable_restd": true, + "slurm_jwt_version": "v2.1.2" + +} +``` + Role Variables --------------- +============== +rpmbuild +-------- | Name | Default Value | Description | |---------------------------------------------|-----------------------|-----------------------------------------------------------------------------------| -| mounts | [] | List of dictionaries defining the mount | +| slurm_rpmbuild_user | slurmbuild | Non-privileged user for rpmbuilds. This user will be created if necessary. +| slurm_rpmbuild_user_home |/home/{{ slurm_rpmbuild_user }}| rpmbuild root +| slurm_local_repo_name | "" | Name of Slurm RPM Repo +| slurm_local_repo_host | "" | Hostname for slrum RPM Repo +| slurm_rpm_repo_scp_path |root@{{ slurm_local_repo_host }}:{{ slurm_rpm_server_path_base }}/{{ slurm_local_repo_name }} | scp command to put packages +| slurm_rpm_final_path | | Path to local final rpm packages | slurm_accounting_storage_enforce | 0 | Accounting enforcement | | slurm_cgroup_automount | yes | Automount cgroups | | slurm_cgroup_constrain_cores | yes | Constrain cores available | | slurm_cgroup_constrain_ram_space | yes | Constrain ram space | -| slurm_cluster_name | cluster | Name for this cluster install | +| slurm_cluster_name | cluster | Name for this cluster install +| slurm_conf_max_job_count | 10000 | slurm.conf MaxJobCount | slurm_conf_accounting_storage_external_host | "" | External Accounting DB host ip and port | | slurm_conf_accouting_storage_tres | [] | AccoutingStorageTres Parameter | | slurm_conf_cli_filter_plugins | [] | List of filter/modification plugins | diff --git a/slurm/defaults/main.yml b/slurm/defaults/main.yml index 27de651..5a6e239 100644 --- a/slurm/defaults/main.yml +++ b/slurm/defaults/main.yml @@ -1,10 +1,47 @@ --- +# Build RPM Packages - when true, this will biuld RPM packages for slurm +# and optionally PMIX then exit the role and complete. + +# RPM Build Details +#====================== +slurm_build_rpms: false +slurm_rpmbuild_user: slurmbuild +slurm_rpmbuild_user_home: "/home/{{ slurm_rpmbuild_user }}" +slurm_rpm_final_path: "/home/{{ slurm_rpmbuild_user }}/rpmbuild/RPMS/{{ ansible_architecture }}" +slurm_local_repo_def_path: /etc/yum.repos.d/slurm.repo + +# Local Slurm Repo Details +#========================== +# This assumes an HTTP based RPM Repo +slurm_local_repo_description: "Slurm RPM Repository" +# Path to HTTP server document root +slurm_rpm_server_path_base: "/var/www/html" +slurm_local_repo_name: slurm +slurm_local_repo_host: yuma-s1 +slurm_rpm_local_repo_url: "http://{{ slurm_local_repo_host }}/{{ slurm_local_repo_name }}" +slurm_rpm_repo_path: "{{ slurm_rpm_server_path_base }}/{{ slurm_local_repo_name }}" +slurm_rpm_repo_scp_path: "root@{{ slurm_local_repo_host }}:{{ slurm_rpm_repo_path }}" + + # defaults file for slurm-controller slurm_database_password: "default" -#slurm_version: "21.08.8-2" -slurm_version: "23.11.5-1" + +# Code hosted at Github and SchedMD have different filename formats +# Github slurm versions are nn.nn.nn-1 +# ShedMD release versions are nn.nn.nn +# slurm_version: "21.08.8-1" # GitHub +# slurm_version: "23.11.11" # SchedMD +slurm_version: "24.11.6" + +# For Slurm < 24.05.8-0 the slurmd and slurmctld use "-D --systemd" +# For Slurm >= 24.05.8-0, slurmd and slurmctld use only "--systemd" +# https://github.com/SchedMD/slurm/blob/slurm-24.05/NEWS +slurm_dasd_d_ver: "24.05.8" +slurm_dash_d: "-D" + slurm_enable_systemd_daemon_flag: false -slurm_jwt_version: "v1.12.0" +slurm_build_jwt_source: false +slurm_jwt_version: "v2.1.2" slurm_slurmd: false slurm_controller: false slurm_uid: 450 @@ -21,7 +58,7 @@ slurm_daemon_debug: 3 slurm_proctrack_type: "cgroup" slurm_nodes: [] slurm_partitions: [] -slurm_daemon_spool_dir: /var/spool/slurmd/ +slurm_daemon_spool_dir: /var/spool/slurmd slurm_state_save_location: /var/spool/slurmctld slurm_scheduler_type: backfill slurm_accounting_storage_enforce: 0 @@ -79,15 +116,16 @@ slurm_install_root: "/opt/slurm" slurm_update_symlink: yes slurm_disable_restart: false slurm_rpm_install: false +slurm_source_install: false slurm_rpm_repo: "" -slurm_build_jobs: 4 +slurm_build_jobs: 8 slurm_enable_cgroup_conf: true -slurm_enable_restd: false +slurm_enable_restd: true slurm_restd_port: 8911 slurm_restd_host: "0.0.0.0" -slurm_restd_user: "{{undef(hint='You must specify the slurm rest api user')}}" -slurm_restd_jwt_key: "{{undef(hint='You must specify the jwt key value')}}" +slurm_restd_user: srestd +slurm_restd_jwt_key: /var/spool/slurmctld/jwt_hs256.key # Slurm profile variables @@ -110,7 +148,7 @@ slurm_task_epilog_append: "" # Firewall rules slurm_firewalld_enabled: false -slurm_firewalld_subnet: "10.141.0.0/16" +slurm_firewalld_subnet: "192.168.114.0/24" slurm_firewalld_python_interpreter: "/usr/bin/python" # Gres information @@ -127,9 +165,13 @@ slurm_conf_accounting_storage_external_host: "" slurm_enable_pam_adopt: false slurm_groups_allowed: [] slurm_enable_pmix: true -slurm_pmix_version: 5.0.1 +slurm_pmix_version: 5.0.8 slurm_force_install: false slurm_conf_extra: {} slurmdbd_conf_extra: {} + + +slurm_rpm_check_path: "{{ slurm_rpmbuild_user_home }}/rpmbuild/RPMS/{{ ansible_architecture }}/slurm-{{ slurm_version }}-1.el{{ ansible_distribution_major_version }}.{{ ansible_architecture }}.rpm" +pmix_rpm_check_path: "{{ slurm_rpmbuild_user_home }}/rpmbuild/RPMS/{{ ansible_architecture }}/pmix-{{ slurm_pmix_version }}-1.el{{ ansible_distribution_major_version }}.{{ ansible_architecture }}.rpm" \ No newline at end of file diff --git a/slurm/tasks/install.yaml b/slurm/tasks/install.yaml index 31645c2..382e196 100644 --- a/slurm/tasks/install.yaml +++ b/slurm/tasks/install.yaml @@ -11,8 +11,26 @@ - slurm_install_slurmd - slurm_install_restd -- ansible.builtin.include_tasks: "jwt.yaml" - when: slurm_enable_restd and install_slurm +# Check if JWT RPM is installed +- name: Check if JWT RPM is installed + ansible.builtin.set_fact: + jwt_packages: "{{ ansible_facts.packages['libjwt'] | default([]) }}" + tags: + - slurm + - slurm_install_restd + when: slurm_enable_restd and slurm_build_jwt_source + +# Fail if slurm_build_jwt_source is true and jwt is installed +- ansible.builtin.fail: + msg: + - "JWT is already installed via RPM - installing from source is not supported." + - "remove jwt rpm packages before building from source." + when: slurm_build_jwt_source and jwt_packages | length > 0 + +# Build and install JWT from source +- name: Build and install JWT from source + ansible.builtin.include_tasks: "jwt.yaml" + when: slurm_enable_restd and slurm_build_jwt_source tags: always @@ -21,6 +39,35 @@ tags: always + +- name: Set slurm download version + ansible.builtin.set_fact: + slurm_download_version: "{{ slurm_version | replace('.', '-')}}" + tags: + - slurm + - slurm_install_controller + - slurm_install_slurmd + - slurm_configure + +- name: Get slurm download version length + ansible.builtin.set_fact: + slurm_download_version_length: "{{ slurm_download_version.split('-') | length }}" + tags: + - slurm + - slurm_install_controller + - slurm_install_slurmd + - slurm_configure + +- name: Update slurm download version + ansible.builtin.set_fact: + slurm_download_version: "{{ slurm_download_version }}-1" + when: "slurm_download_version_length | int < 4" + tags: + - slurm + - slurm_install_controller + - slurm_install_slurmd + - slurm_configure + - name: Download slurm get_url: url: "https://github.com/SchedMD/slurm/archive/refs/\ @@ -48,7 +95,7 @@ - name: Configure slurm command: "./configure --prefix=/{{slurm_install_directory}} --sysconfdir=/etc/slurm \ - --localstatedir=/var --runstatedir=/run {% if slurm_enable_restd %}--with-jwt={{slurm_install_directory}}{% endif %}\ + --localstatedir=/var --runstatedir=/run {% if slurm_enable_restd and slurm_build_jwt_source %}--with-jwt={{slurm_install_directory}}{% endif %}\ {% if slurm_enable_pmix %} --with-pmix={{slurm_install_directory}}{% endif %}" args: chdir: "{{slurm_tmpdir}}/slurm-slurm-{{slurm_download_version}}" @@ -88,6 +135,17 @@ - slurm - slurm_install_slurmd +- name: Create jwt key + ansible.builtin.copy: + dest: "{{slurm_state_save_location}}/jwt_hs256.key" + content: "{{slurm_restd_jwt_key}}" + owner: "{{slurm_uid}}" + group: "{{slurm_gid}}" + mode: "0600" + tags: + - slurm_install_restd + when: slurm_enable_restd + # TODO: Cleanup tmp spaces #- name: Clean up tmp space #ansible.builtin.file: diff --git a/slurm/tasks/install_dev_reqs.yaml b/slurm/tasks/install_dev_reqs.yaml new file mode 100644 index 0000000..12da95c --- /dev/null +++ b/slurm/tasks/install_dev_reqs.yaml @@ -0,0 +1,11 @@ +--- +- name: Install required development packages + ansible.builtin.dnf: + name: "{{item}}" + state: present + enablerepo: "{{ slurm_el_repos }}" + + loop: "{{slurm_required_devel_packages}}" + when: ansible_distribution_major_version | int >= 8 + tags: + - slurm_build_rpms \ No newline at end of file diff --git a/slurm/tasks/install_reqs.yaml b/slurm/tasks/install_reqs.yaml new file mode 100644 index 0000000..36e9113 --- /dev/null +++ b/slurm/tasks/install_reqs.yaml @@ -0,0 +1,66 @@ +--- +- name: Install required packages EL 8+ + ansible.builtin.dnf: + name: "{{item}}" + state: present + enablerepo: "{{ slurm_el_repos }}" + + loop: "{{slurm_required_packages}}" + when: ansible_distribution_major_version | int >= 8 + tags: + - slurm + - slurm_install_controller + - slurm_install_slurmd + +- name: Install required packages EL 7 + ansible.builtin.package: + name: "{{item}}" + state: present + loop: "{{slurm_required_packages}}" + when: ansible_distribution_major_version | int == 7 + tags: + - slurm + - slurm_install_controller + - slurm_install_slurmd + +- name: Install slurmdbd requirements el8+ + ansible.builtin.dnf: + name: "{{item}}" + enablerepo: "{{ slurm_el_repos }}" + loop: "{{slurm_dbd_required_packages}}" + when: slurm_controller and slurm_enable_restd and ansible_distribution_major_version | int >= 8 + tags: + - slurm + - slurm_install_controller + - slurm_install_slurmd + +- name: Install slurmdbd requirements + ansible.builtin.package: + name: "{{item}}" + when: slurm_controller + loop: "{{slurm_dbd_required_packages}}" + tags: + - slurm + - slurm_install_controller + - slurm_install_slurmd + +- name: Install slurm rest api requirements EL8+ + ansible.builtin.dnf: + name: "{{item}}" + enablerepo: "{{ slurm_el_repos }}" + loop: "{{slurm_restapi_required_packages}}" + when: slurm_enable_restd and ansible_distribution_major_version | int >= 8 + tags: + - slurm + - slurm_install_restapi + - slurm_install_restd + +- name: Install slurm rest api requirements EL7 + ansible.builtin.package: + name: "{{item}}" + loop: "{{slurm_restapi_required_packages}}" + when: slurm_enable_restd and ansible_facts['distribution_major_version'] == "7" + tags: + - slurm + - slurm_install_restapi + - slurm_install_restd diff --git a/slurm/tasks/main.yml b/slurm/tasks/main.yml index 6ba0835..cefcfc5 100644 --- a/slurm/tasks/main.yml +++ b/slurm/tasks/main.yml @@ -1,5 +1,4 @@ --- - - name: Gather os specific variables ansible.builtin.include_vars: "{{ item }}" with_first_found: @@ -12,50 +11,51 @@ tags: - always -- name: Set slurm download version - ansible.builtin.set_fact: - slurm_download_version: "{{ slurm_version | replace('.', '-')}}" - tags: - - slurm - - slurm_install_controller - - slurm_install_slurmd - - slurm_configure +- name: Gather package facts + ansible.builtin.package_facts: + manager: "auto" + +################################################## +# Exits role after the rpmbuild(s) are complete. +- name: Build RPM packages if needed + ansible.builtin.include_tasks: "rpmbuild.yaml" + when: slurm_build_rpms +# role exits early if slurm_build_rpms == true +################################################## -- name: Get slurm download version length - ansible.builtin.set_fact: - slurm_download_version_length: "{{ slurm_download_version.split('-') | length }}" - tags: - - slurm - - slurm_install_controller - - slurm_install_slurmd - - slurm_configure -- name: Update slurm download version - ansible.builtin.set_fact: - slurm_download_version: "{{ slurm_download_version }}-1" - when: "slurm_download_version_length | int < 4" - tags: - - slurm - - slurm_install_controller - - slurm_install_slurmd - - slurm_configure +- name: Check install method + fail: + msg: + - "slurm_rpm_install and slurm_source_install are mutually exclusive." + when: slurm_rpm_install and slurm_source_install - name: Include pre-install tasks ansible.builtin.include_tasks: "pre_install.yaml" tags: - always +- name: Check if Slurm Version is above 24.05.8-0 + ansible.builtin.include_tasks: "slurm_version_check.yaml" + tags: + - always + +- name: Fail if both slurm_rpm_install and slurm_source_install + ansible.builtin.fail: + msg: "slurm_rpm_install and slurm_source_install are mutually exclusive" + when: slurm_rpm_install and slurm_source_install + - name: Include install tasks ansible.builtin.include_tasks: "install.yaml" - when: not slurm_rpm_install tags: - always + when: slurm_source_install - name: Include rpm install tasks ansible.builtin.include_tasks: "rpm_install.yaml" - when: slurm_rpm_install tags: - always + when: slurm_rpm_install - name: Include post-install tasks ansible.builtin.include_tasks: "post_install.yaml" diff --git a/slurm/tasks/pmix.yaml b/slurm/tasks/pmix.yaml index f93dc9d..f3f3f2b 100644 --- a/slurm/tasks/pmix.yaml +++ b/slurm/tasks/pmix.yaml @@ -5,24 +5,13 @@ state: present enablerepo: "{{ slurm_el_repos }}" - loop: "{{slurm_pmix_required_packages}}" + loop: "{{slurm_required_devel_packages}}" when: ansible_distribution_major_version | int >= 8 tags: - slurm - slurm_install_slurmd - slurm_install_controller -- name: Install required packages EL 7 - ansible.builtin.package: - name: "{{item}}" - state: present - loop: "{{slurm_pmix_required_packages}}" - when: ansible_distribution_major_version | int == 7 - tags: - - slurm - - slurm_install_slurmd - - slurm_install_controller - - name: Download pmix ansible.builtin.unarchive: src: "https://github.com/openpmix/openpmix/releases/download/v{{ slurm_pmix_version }}/pmix-{{ slurm_pmix_version }}.tar.gz" diff --git a/slurm/tasks/pre_install.yaml b/slurm/tasks/pre_install.yaml index 9ec3128..e13b9e2 100644 --- a/slurm/tasks/pre_install.yaml +++ b/slurm/tasks/pre_install.yaml @@ -66,6 +66,23 @@ - slurm_install_restapi - slurm_install_restd +- name: Check if jwtlib-devel is required + set_fact: + slurm_required_devel_packages: "{{ slurm_required_devel_packages | difference(['libjwt-devel']) }}" + when: slurm_enable_restd and slurm_build_jwt_source + +- name: Install required development packages + ansible.builtin.dnf: + name: "{{item}}" + state: present + enablerepo: "{{ slurm_el_repos }}" + loop: "{{ slurm_required_devel_packages }}" + when: slurm_source_install + tags: + - slurm + - slurm_install_controller + - slurm_install_slurmd + - name: Add slurm group ansible.builtin.group: name: slurm diff --git a/slurm/tasks/rpm_install.yaml b/slurm/tasks/rpm_install.yaml index 17ac28b..b46177a 100644 --- a/slurm/tasks/rpm_install.yaml +++ b/slurm/tasks/rpm_install.yaml @@ -1,17 +1,27 @@ -- name: Create local repo file - get_url: - url: "{{ slurm_rpm_repo }}" - dest: /etc/yum.repos.d/local.repo - mode: 0644 - owner: root - group: root - force: yes - when: not ansible_check_mode - tags: - - slurm +- name: Add local custom repo + yum_repository: + name: "{{ slurm_local_repo_name }}" + description: "{{ slurm_local_repo_description }}" + baseurl: "{{ slurm_rpm_local_repo_url }}" + gpgcheck: no + +- name: Check Command + ansible.builtin.shell: + cmd: "dnf repoquery --repo {{ slurm_local_repo_name }} slurm | awk -F: '{print $2}' | awk -F- '{print $1}'" + register: slurm_repo_version + +- name: Verify repo vs expected version + debug: + msg: + - "{{ slurm_repo_version.stdout }}" + +- name: Fail on Version mismatch + ansible.builtin.fail: + msg: "Required Slurm Version {{ slurm_version }} does not match available RPM on repo {{ slurm_repo_version.stdout }}" + when: slurm_repo_version.stdout != slurm_version - name: Install slurm rpm packages - package: + yum: state: present name: - slurm @@ -23,7 +33,7 @@ - slurm_install_slurmd - name: Install slurm controller rpm packages - package: + yum: state: present name: - slurm-slurmctld @@ -34,7 +44,7 @@ - slurm_install_controller - name: Install slurmd rpm packages - package: + yum: state: present name: - slurm-slurmd @@ -45,7 +55,7 @@ - slurm_install_slurmd - name: Install slurmrestd rpm packages - package: + yum: state: present name: - slurm-slurmrestd @@ -56,3 +66,18 @@ - slurm_install_restd +- name: Add jwt key + command: "dd if=/dev/random of=/var/spool/slurmctld/jwt_hs256.key bs=32 count=1" + when: slurm_controller and slurm_enable_restd + tags: + - slurm + - slurm_install_controller + - slurm_install_restd + +- name: Change file ownership, group and permissions + ansible.builtin.file: + path: /var/spool/slurmctld/jwt_hs256.key + owner: "{{ slurm_user }}" + group: "{{ slurm_user }}" + mode: '0600' + when: slurm_controller and slurm_enable_restd \ No newline at end of file diff --git a/slurm/tasks/rpmbuild.yaml b/slurm/tasks/rpmbuild.yaml new file mode 100644 index 0000000..4d45c04 --- /dev/null +++ b/slurm/tasks/rpmbuild.yaml @@ -0,0 +1,100 @@ +--- +- name: Add slurmbuild user + ansible.builtin.user: + name: "{{ slurm_rpmbuild_user }}" + comment: "Slurm Build User" + shell: /bin/bash + create_home: true + home: "{{ slurm_rpmbuild_user_home }}" + state: present + + +- name: Create temporary source dir + ansible.builtin.file: + state: directory + path: "{{ slurm_tmpdir }}" + mode: "0777" + owner: "{{ slurm_rpmbuild_user }}" + group: "{{ slurm_rpmbuild_user }}" + + +- name: Set Slurm RPM build variables + ansible.builtin.set_fact: + slurm_rpm_base_path: "{{slurm_rpmbuild_user_home}}/rpmbuild/RPMS/{{ ansible_architecture }}" + +- name: Set PMix RPM path + ansible.builtin.set_fact: + slurm_pmix_rpm_path: "{{ slurm_rpm_base_path }}/pmix-{{ slurm_pmix_version }}-1.el{{ ansible_distribution_major_version }}.{{ ansible_architecture }}.rpm" + +- name: Check if PMix rpm exists + ansible.builtin.stat: + path: "{{ slurm_pmix_rpm_path }}" + register: slurm_pmix_rpm_path_stat + +- name: Set PMix root path + ansible.builtin.set_fact: + slurm_pmix_expected_root_path: "{{ slurm_install_directory }}/bin/pmix_info" + +- name: Check if PMix expected root path exists + ansible.builtin.stat: + path: "{{ slurm_pmix_expected_root_path }}" + register: slurm_pmix_expected_root_path_stat + +- name: Check if PMix is installed + ansible.builtin.set_fact: + slurm_pmix_installed: "{{ 'pmix' in ansible_facts.packages }}" + +- name: Check PMix Installed Version + ansible.builtin.set_fact: + slurm_pmix_installed_version: "{{ ansible_facts.packages['pmix'][0].version if slurm_pmix_installed else 'not installed' }}" + + +# Show some debug information +- name: Debug PMix Build vars + ansible.builtin.debug: + msg: + - "slurm_install_directory: {{ slurm_install_directory }} " + - "slurm_version {{ slurm_version }}" + - "slurm_pmix_version {{ slurm_pmix_version }}" + - " ----------------------------------------------------------------------" + - "slurm_pmix_expected_root_path: {{ slurm_pmix_expected_root_path}}" + - "slurm_pmix_expected_root_path_stat.stat.exists: {{ slurm_pmix_expected_root_path_stat.stat.exists }}" + - " ----------------------------------------------------------------------" + - "slurm_pmix_rpm_path: {{ slurm_pmix_rpm_path }}" + - "slurm_pmix_rpm_path_stat.stat.exists: {{ slurm_pmix_rpm_path_stat.stat.exists }}" + - " ----------------------------------------------------------------------" + - "slurm_pmix_installed: {{ slurm_pmix_installed }}" + - "slurm_pmix_installed_version: {{ slurm_pmix_installed_version }}" + - " ----------------------------------------------------------------------" + - "slurm_enable_pmix: {{ slurm_enable_pmix }}" + - "slurm_pmix_rpm_path_stat.stat.exists: {{ slurm_pmix_rpm_path_stat.stat.exists }}" + - " ----------------------------------------------------------------------" + + +- name: Build PMix if required + ansible.builtin.include_tasks: rpmbuild_pmix.yaml + when: slurm_enable_pmix + + +- name: Build Slurm if required + ansible.builtin.include_tasks: rpmbuild_slurm.yaml + +- name: Make sure remote slurm repo path exists + ansible.builtin.shell: "ssh root@{{ slurm_local_repo_host }} mkdir -p {{ slurm_rpm_server_path_base }}/{{ slurm_local_repo_name }}" + +- name: scp completed RPMs to repo + ansible.builtin.shell: "scp {{ slurm_rpm_final_path }}/*.rpm {{ slurm_rpm_repo_scp_path }}" + +- name: Run createrepo + ansible.builtin.command: + argv: + - ssh + - "{{ slurm_local_repo_host}}" + - createrepo + - "{{ slurm_rpm_server_path_base }}/{{ slurm_local_repo_name }}" + +- name: set permissions on remote repo path + ansible.builtin.shell: "ssh root@{{ slurm_local_repo_host }} chown -R apache:apache {{ slurm_rpm_repo_path }}" + +- name: rpmbuild end play + meta: end_play diff --git a/slurm/tasks/rpmbuild_pmix.yaml b/slurm/tasks/rpmbuild_pmix.yaml new file mode 100644 index 0000000..a757380 --- /dev/null +++ b/slurm/tasks/rpmbuild_pmix.yaml @@ -0,0 +1,29 @@ +--- +- name: Install required development packages + ansible.builtin.dnf: + name: "{{item}}" + state: present + enablerepo: "{{ slurm_el_repos }}" + + loop: "{{slurm_pmix_required_devel_packages}}" + when: ansible_distribution_major_version | int >= 8 + +- name: Download source bundle + ansible.builtin.get_url: + url: "https://github.com/openpmix/openpmix/releases/download/v{{ slurm_pmix_version }}/pmix-{{ slurm_pmix_version }}.tar.bz2" + dest: "{{ slurm_tmpdir }}/pmix-{{ slurm_pmix_version }}.tar.bz2" + owner: "{{ slurm_rpmbuild_user }}" + group: "{{ slurm_rpmbuild_user }}" + mode: '0644' + +- name: rpmbuild pmix from source tarball + command: "rpmbuild -D \"_prefix {{ slurm_install_directory }}\" -ta pmix-{{ slurm_pmix_version }}.tar.bz2" + args: + chdir: "{{slurm_tmpdir}}" + become: true + become_user: "{{ slurm_rpmbuild_user }}" + +- name: Check RPM file + ansible.builtin.stat: + path: "{{ slurm_pmix_rpm_path }}" + register: slurm_pmix_rpm_path_stat diff --git a/slurm/tasks/rpmbuild_slurm.yaml b/slurm/tasks/rpmbuild_slurm.yaml new file mode 100644 index 0000000..36fa381 --- /dev/null +++ b/slurm/tasks/rpmbuild_slurm.yaml @@ -0,0 +1,122 @@ +--- +- name: Install required development packages + ansible.builtin.dnf: + name: "{{item}}" + state: present + enablerepo: "{{ slurm_el_repos }}" + loop: "{{ slurm_required_devel_packages }}" + when: ansible_distribution_major_version | int >= 8 + +- name: Download source bundle + ansible.builtin.get_url: + url: "https://download.schedmd.com/slurm/slurm-{{ slurm_version }}.tar.bz2" + dest: "{{ slurm_tmpdir }}/slurm-{{ slurm_version }}.tar.bz2" + become: true + become_user: "{{ slurm_rpmbuild_user }}" + +# If PMix is installed it need to be both +# the correct version and installed in the expected path +# Error Case 1 - slurm_enable_pmix is true and wrong pmix version is installed +- name: Check if PMix version is compatible + ansible.builtin.fail: + msg: "PMIx version {{ slurm_pmix_installed_version }} is not the target version {{ slurm_pmix_version }}" + when: slurm_enable_pmix and slurm_pmix_installed and (slurm_pmix_installed_version != slurm_pmix_version) + +# Error Case 2 - slurm_enable_pmix is true and PMix expected root path does not exist +- name: Fail if PMix expected root path does not exist + ansible.builtin.fail: + msg: "PMIx expected root path {{ slurm_pmix_expected_root_path }} does not exist. Please ensure PMIx is installed correctly." + when: slurm_enable_pmix and slurm_pmix_installed and not slurm_pmix_expected_root_path_stat.stat.exists + +# Error Case 3 - slurm_enable_pmix is true and PMix RPM path does not exist +- name: Fail if PMix RPM path does not exist + ansible.builtin.fail: + msg: "PMIx RPM path {{ slurm_pmix_rpm_path }} does not exist. Please ensure the RPM is available for installation." + when: slurm_enable_pmix and slurm_pmix_installed and not slurm_pmix_rpm_path_stat.stat.exists + +- name: debug out + debug: + msg: + - "slurm_enable_pmix: {{ slurm_enable_pmix }}" + - "slurm_pmix_installed_version: {{ slurm_pmix_installed_version }}" + +# Yum Install PMix RPM +- name: Install PMix RPM + ansible.builtin.yum: + name: "{{ slurm_pmix_rpm_path }}" + state: present + disable_gpg_check: true + when: slurm_enable_pmix + +- name: Check if PMix rpm exists + ansible.builtin.stat: + path: "{{ slurm_pmix_rpm_path }}" + register: slurm_pmix_rpm_path_stat + +- name: Set PMix root path + ansible.builtin.set_fact: + slurm_pmix_expected_root_path: "{{ slurm_install_directory }}/bin/pmix_info" + +- name: Check if PMix expected root path exists + ansible.builtin.stat: + path: "{{ slurm_pmix_expected_root_path }}" + register: slurm_pmix_expected_root_path_stat + +- name: Check if PMix is installed + ansible.builtin.set_fact: + slurm_pmix_installed: "{{ 'pmix' in ansible_facts.packages }}" + +- name: Check PMix Installed Version + ansible.builtin.set_fact: + slurm_pmix_installed_version: "{{ ansible_facts.packages['pmix'][0].version if slurm_pmix_installed else 'not installed' }}" + + +# Show some debug information +- name: Debug PMix Build vars + ansible.builtin.debug: + msg: + - "slurm_install_directory: {{ slurm_install_directory }} " + - "slurm_version {{ slurm_version }}" + - "slurm_pmix_version {{ slurm_pmix_version }}" + - " ----------------------------------------------------------------------" + - "slurm_pmix_expected_root_path: {{ slurm_pmix_expected_root_path}}" + - "slurm_pmix_expected_root_path_stat.stat.exists: {{ slurm_pmix_expected_root_path_stat.stat.exists }}" + - " ----------------------------------------------------------------------" + - "slurm_pmix_rpm_path: {{ slurm_pmix_rpm_path }}" + - "slurm_pmix_rpm_path_stat.stat.exists: {{ slurm_pmix_rpm_path_stat.stat.exists }}" + - " ----------------------------------------------------------------------" + - "slurm_pmix_installed: {{ slurm_pmix_installed }}" + - "slurm_pmix_installed_version: {{ slurm_pmix_installed_version }}" + +- name: Base rpmbuild command + ansible.builtin.set_fact: + rpmbuild_cmd: "rpmbuild -D '_prefix {{ slurm_install_directory }}'" + +- name: rpmbuild pmix option default to "" + ansible.builtin.set_fact: + rpmbuild_pmix_option: "" + +- name: rpmbuild pmix option + ansible.builtin.set_fact: + rpmbuild_pmix_option: "-D '_with_pmix --with-pmix={{ slurm_install_directory }}'" + when: slurm_enable_pmix + +- name: rpmbuild slurmrestd option default to "" + ansible.builtin.set_fact: + rpmbuild_restd_option: "" + +- name: rpmbuild slurmrestd option + ansible.builtin.set_fact: + rpmbuild_restd_option: "--with slurmrestd --with jwt" + when: slurm_enable_restd + +- name: show build command + ansible.builtin.debug: + msg: "rpmbuild command: {{ rpmbuild_cmd }} {{ rpmbuild_pmix_option }} {{ rpmbuild_restd_option }} -ta slurm-{{ slurm_version }}.tar.bz2" + +- name: rpmbuild from source tarball + command: "{{ rpmbuild_cmd }} {{ rpmbuild_pmix_option }} {{ rpmbuild_restd_option }} -ta slurm-{{ slurm_version }}.tar.bz2" + args: + chdir: "{{slurm_tmpdir}}" + become: true + become_user: "{{ slurm_rpmbuild_user }}" \ No newline at end of file diff --git a/slurm/tasks/slurm_version_check.yaml b/slurm/tasks/slurm_version_check.yaml new file mode 100644 index 0000000..0a39c4b --- /dev/null +++ b/slurm/tasks/slurm_version_check.yaml @@ -0,0 +1,40 @@ +--- +- name: Set slurm download version + ansible.builtin.set_fact: + slurm_download_version: "{{ slurm_version | replace('.', '-')}}" + tags: + - slurm + - slurm_install_controller + - slurm_install_slurmd + - slurm_configure + +- name: Get slurm download version length + ansible.builtin.set_fact: + slurm_download_version_length: "{{ slurm_download_version.split('-') | length }}" + tags: + - slurm + - slurm_install_controller + - slurm_install_slurmd + - slurm_configure + +- name: Update slurm download version + ansible.builtin.set_fact: + slurm_download_version: "{{ slurm_download_version }}-1" + when: "slurm_download_version_length | int < 4" + tags: + - slurm + - slurm_install_controller + - slurm_install_slurmd + - slurm_configure + + +- name: Check for version greater then '24.05.8' + # We'll use this to build the slurmd and slurmctld command line opts + ansible.builtin.set_fact: + slurm_dash_d: "" + when: + "slurm_version is version(slurm_dasd_d_ver, '>=')" + tags: + - slurm + - slurm_install_controller + - slurm_install_slurmd \ No newline at end of file diff --git a/slurm/tasks/slurmctld.yaml b/slurm/tasks/slurmctld.yaml index 70a0791..bdcd9d7 100755 --- a/slurm/tasks/slurmctld.yaml +++ b/slurm/tasks/slurmctld.yaml @@ -122,6 +122,7 @@ tags: - slurm - slurm_install_controller + when: slurm_source_install - name: Start slurmctld service ansible.builtin.service: diff --git a/slurm/tasks/slurmd.yaml b/slurm/tasks/slurmd.yaml index c656b98..f875c3c 100644 --- a/slurm/tasks/slurmd.yaml +++ b/slurm/tasks/slurmd.yaml @@ -9,6 +9,25 @@ - slurm - slurm_install_slurmd - slurm_configure + when: slurm_source_install + + +- name: create slurmd sysconfig file + ansible.builtin.template: + src: templates/slurmd.sysconfig.j2 + dest: /etc/sysconfig/slurmd + owner: root + group: root + mode: "0644" + when: slurm_rpm_install + tags: + - slurm + - slurm_install_slurmd + +# In case another instance of slurmd.service is runnig +- name: Restart systemceld + systemd: + daemon_reexec: yes - name: Create slurm configuration directory ansible.builtin.file: diff --git a/slurm/tasks/slurmdbd.yaml b/slurm/tasks/slurmdbd.yaml index 1af6556..5d99e26 100644 --- a/slurm/tasks/slurmdbd.yaml +++ b/slurm/tasks/slurmdbd.yaml @@ -91,6 +91,7 @@ tags: - slurm - slurm_install_controller + when: slurm_source_install - name: Start slurmdbd service ansible.builtin.service: diff --git a/slurm/tasks/slurmrestd.yaml b/slurm/tasks/slurmrestd.yaml index 789d0f4..6b98cdc 100644 --- a/slurm/tasks/slurmrestd.yaml +++ b/slurm/tasks/slurmrestd.yaml @@ -9,6 +9,19 @@ tags: - slurm - slurm_install_slurmd + when: slurm_source_install + +- name: create slurmrestd service file + ansible.builtin.template: + src: templates/slurmrestd.sysconfig.j2 + dest: /etc/sysconfig/slurmrestd + owner: root + group: root + mode: "0644" + tags: + - slurm + - slurm_install_slurmd + when: slurm_rpm_install - name: Start slurmrestd service ansible.builtin.service: diff --git a/slurm/templates/slurm.repo.j2 b/slurm/templates/slurm.repo.j2 new file mode 100644 index 0000000..99737b2 --- /dev/null +++ b/slurm/templates/slurm.repo.j2 @@ -0,0 +1,5 @@ +[slurm_repo] +baseurl = {{ slurm_rpm_local_repo_url }} +enabled = 1 +gpgcheck = 0 +name = Local Slurm Repo \ No newline at end of file diff --git a/slurm/templates/slurmctld.service.j2 b/slurm/templates/slurmctld.service.j2 index 4c6a193..5237fef 100644 --- a/slurm/templates/slurmctld.service.j2 +++ b/slurm/templates/slurmctld.service.j2 @@ -12,7 +12,7 @@ User=slurm Group=slurm RuntimeDirectory=slurmctld RuntimeDirectoryMode=0755 -ExecStart={{slurm_install_symlink}}/sbin/slurmctld -D -s --systemd $SLURMCTLD_OPTIONS +ExecStart={{slurm_install_symlink}}/sbin/slurmctld {{ slurm_dash_d }} -s --systemd $SLURMCTLD_OPTIONS ExecReload=/bin/kill -HUP $MAINPID LimitNOFILE=65536 diff --git a/slurm/templates/slurmd.service.j2 b/slurm/templates/slurmd.service.j2 index c143cf4..82ddf72 100644 --- a/slurm/templates/slurmd.service.j2 +++ b/slurm/templates/slurmd.service.j2 @@ -10,7 +10,7 @@ EnvironmentFile=-/etc/sysconfig/slurmd EnvironmentFile=-/etc/default/slurmd RuntimeDirectory=slurm RuntimeDirectoryMode=0755 -ExecStart={{slurm_install_symlink}}/sbin/slurmd -D -s --conf-server {{slurm_controller_ip}}:{{slurm_controller_port}} --systemd $SLURMD_OPTIONS +ExecStart={{slurm_install_symlink}}/sbin/slurmd {{ slurm_dash_d }} -s --conf-server {{slurm_controller_ip}}:{{slurm_controller_port}} --systemd $SLURMD_OPTIONS ExecReload=/bin/kill -HUP $MAINPID KillMode=process LimitNOFILE=131072 diff --git a/slurm/templates/slurmd.sysconfig.j2 b/slurm/templates/slurmd.sysconfig.j2 new file mode 100644 index 0000000..0175279 --- /dev/null +++ b/slurm/templates/slurmd.sysconfig.j2 @@ -0,0 +1 @@ +SLURMD_OPTIONS="-s --conf-server {{slurm_controller_ip}}:{{slurm_controller_port}}" \ No newline at end of file diff --git a/slurm/templates/slurmrestd.sysconfig.j2 b/slurm/templates/slurmrestd.sysconfig.j2 new file mode 100644 index 0000000..b64c9e8 --- /dev/null +++ b/slurm/templates/slurmrestd.sysconfig.j2 @@ -0,0 +1 @@ +SLURMRESTD_OPTIONS="-u {{ slurm_restd_user }} -g {{ slurm_restd_user }} {{slurm_restd_host}}:{{ slurm_restd_port }}" \ No newline at end of file diff --git a/slurm/vars/RedHat-9.yaml b/slurm/vars/RedHat-9.yaml index d946c05..01dd223 100644 --- a/slurm/vars/RedHat-9.yaml +++ b/slurm/vars/RedHat-9.yaml @@ -2,48 +2,58 @@ slurm_el_repos: crb slurm_required_packages: - - zlib-devel - zlib - bzip2 - - bzip2-devel - openssl - openssh-server - - git - - openssl-devel - pkgconfig - wget - python3 - epel-release - - "@Development tools" - - dbus-devel - kernel-headers - s-nail - lua + +slurm_required_devel_packages: + - zlib-devel + - bzip2-devel + - openssl-devel + - git + - epel-release + - "@Development tools" + - dbus-devel + - kernel-headers - lua-devel - pam-devel + - mariadb-devel + - jansson-devel + - libyaml-devel + - json-c-devel + - http-parser + - http-parser-devel + - libjwt-devel + - munge-devel + - munge-libs + - perl-ExtUtils-MakeMaker + - readline-devel + slurm_dbd_required_packages: - mariadb-server - - mariadb-devel - python3-PyMySQL slurm_jwt_required_packages: - jansson - - jansson-devel - - + slurm_restapi_required_packages: - libyaml - - libyaml-devel - json-c - - json-c-devel - - http-parser - - http-parser-devel + slurm_firewalld_packages: - python3-firewall -slurm_pmix_required_packages: +slurm_pmix_required_devel_packages: - "@Development tools" - libevent-devel - hwloc-devel