diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0593d1a40..bd32d16e1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -161,9 +161,6 @@ jobs: --add-feature remote-execution \ --add-feature bmc \ ${{ matrix.iop == 'enabled' && '--add-feature iop' || '' }} - - name: Run health check - run: | - ./foremanctl health - name: Run tests run: | ./forge test --pytest-args="--certificate-source=${{ matrix.certificate_source }} --database-mode=${{ matrix.database }}" diff --git a/AGENTS.md b/AGENTS.md index 408db95d4..c4eea2734 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,8 +1,4 @@ -# AI Development Guide - -This file provides guidance to AI tools and agents when working with code in this repository. It is intentionally tool-agnostic -- it works with Claude Code, Cursor, GitHub Copilot, and any other AI assistant. - -## Project Overview +## Foremanctl foremanctl is a deployment tool for Foreman and Katello using Podman quadlets and Ansible. It provides a command-line interface built on top of `obsah` (an Ansible wrapper) for managing containerized Foreman deployments. @@ -126,8 +122,8 @@ Roles in `src/roles/` correspond to services and deployment stages: - Service roles: `foreman`, `pulp`, `candlepin`, `postgresql`, `redis`, `httpd` - Feature roles: `hammer`, `foreman_proxy` - Infrastructure: `certificates`, `systemd_target` -- Checks: `check_hostname`, `check_database_connection`, `check_system_requirements`, `check_subuid_subgid` - Lifecycle: `pre_install`, `post_install` +- Checks: `check_*` roles; see [documentation](docs/developer/checks.md) for more info ### Configuration System @@ -170,15 +166,23 @@ Tests use pytest with testinfra for infrastructure testing. Key fixtures in `tes Test files follow the pattern `tests/_test.py` and use testinfra's server fixture to execute commands and check system state. -## Developer Documentation - -- [How to Add a Feature](docs/developer/how-to-add-a-feature.md) - end-to-end feature development -- [Playbooks and Roles](docs/developer/playbooks-and-roles.md) - playbook structure, naming, metadata -- [Testing](docs/developer/testing.md) - test infrastructure, fixtures, patterns -- [Deployment Design](docs/developer/deployment.md) -- deployment architecture -- [Container Image Builds](docs/developer/container-image-builds.md) - image naming, registries -- [Development Environment](docs/developer/development-environment.md) - dev setup with git Foreman - -## Git Workflow - -Main branch: `master` +## Additional Documentation + +Developer docs: +- [Check roles](docs/developer/checks.md) - How to integrate check roles; update as checks are created/modified +- [Container Image Builds](docs/developer/container-image-builds.md) - Info on image naming, registries +- [Deployment Architecture](docs/developer/deployment.md) +- [Development Environment](docs/developer/development-environment.md) - Dev environment setup with Foreman from source +- [How to Add a Feature](docs/developer/how-to-add-a-feature.md) - End-to-end feature development +- [Playbooks and Roles](docs/developer/playbooks-and-roles.md) - Playbook structure, naming, metadata +- [Testing](docs/developer/testing.md) - Additional info on test infrastructure, fixtures, patterns + +User docs: +- [Certificates](docs/user/certificates.md) - Overview of certificate sources +- [Parameters](docs/user/parameters.md) - Map of Foreman installation parameters; update as parameters are created/modified + +- [CONTRIBUTING](CONTRIBUTING.md) - How to contribute +- [Development](DEVELOPMENT.md) - Foremanctl development overview +- [IOP](docs/iop.md) - Overview of insights on premise +- [Migration Guide](docs/migration-guide.md) - Migrating from foreman-installer to foremanctl +- [Release](RELEASE.md) - Info on Foremanctl releases \ No newline at end of file diff --git a/docs/developer/checks.md b/docs/developer/checks.md new file mode 100644 index 000000000..a6e9a14d3 --- /dev/null +++ b/docs/developer/checks.md @@ -0,0 +1,74 @@ +# Checks +Foremanctl contains several "check_*" roles which are used to assist playbooks in confirming the Foreman server is configured properly. Each check is implemented as an independent Ansible role, allowing playbooks to run specific checks or groups of checks as needed. + +Please update this file as check usage evolves. + +## Check Descriptions +### check_database_connection +- **Description**: Validates PostgreSQL connectivity for external Foreman and Candlepin databases using configured credentials and SSL settings. +- **Fail state**: Fails if database connection cannot be established. +- **Rationale**: Database access is required for many foremanctl operations; external databases must be reachable using the provided credentials. + +### check_duplicate_permissions +- **Description**: Queries the Foreman database for duplicate entries in the permissions table. +- **Fail state**: Fails if duplicate permissions are detected. +- **Rationale**: A validation was incorrectly removed which prevented users from creating duplicate Foreman permissions, causing upgrade failure. This check will need to be included until https://projects.theforeman.org/issues/38465 is addressed. + +### check_features +- **Description**: Ensures that all foremanctl features requested (via the `--feature` flag) are valid. +- **Fail state**: Fails when requested features are not recognized by foremanctl. +- **Rationale**: This check handles input sanitization for `--feature`. + +### check_foreman_api +- **Description**: Pings Foreman API (/api/v2/ping) to verify it responds. Verifies foreman_tasks service status is 'ok' when Katello/content feature is enabled. +- **Fail state**: Fails if ping fails or service status is not 'ok'. +- **Rationale**: An API connection is necessary for most Foreman operations. This check ensures there are no firewall or address issues. + +### check_foreman_tasks +- **Description**: Queries Foreman tasks for paused, errored tasks. +- **Fail state**: Fails if any errored tasks are found. +- **Rationale**: Errored Foreman tasks indicate issues which need to be addressed by the user. This can be anything from a typo to systemic issues. Consulting https://community.theforeman.org may provide insight. +- **Skipping**: This role can be skipped in `foremanctl health` by passing the `--skip-check-foreman-tasks` flag. Use only for operations where a failed Foreman task is expected or unavoidable. + +### check_host_facts_count +- **Description**: Ensures all hosts' facts counts are below a maximum threshold. +- **Fail state**: Fails if facts count exceeds threshold for any host. +- **Rationale**: Very high host facts count causes slow facts processing. See: https://access.redhat.com/solutions/4163891 for more information. + +### check_hostname +- **Description**: Validates FQDN is not 'localhost' or 'localhost.localdomain', contains at least one dot, and has no underscores. +- **Fail state**: Fails if FQDN conditions are not met. +- **Rationale**: Foreman requires a properly-configured server FQDN. This check ensures server hostname was modified from the default and approximates a valid FQDN. + +### check_services +- **Description**: Reports the status of all non-recurring services in the foreman.target systemd dependency tree. +- **Fail state**: Fails if any services are inactive. +- **Rationale**: Failed or stalled foreman services indicate an issue with the server. The user should check Foreman logs to find and address the problem. Inactive foreman services may indicate a systemctl configuration problem. + +### check_subuid_subgid +- **Description**: Verifies `/etc/subuid` and `/etc/subgid` have entries for the current user, which is required for rootless Podman on the Foreman server. +- **Fail state**: Fails if these conditions are not met. +- **Rationale**: Any Podman container operation run from a non-privileged user (pull, run, secret management) requires configuring subordinate user and group IDs. This check is unused at the moment but supports future work setting up Podman under a non-root user. + +### check_system_requirements +- **Description**: This check validates the Foreman system meets minimum hardware requirements determined by the user's tuning profile. +- **Fail state**: Fails if these conditions are not met or if the tuning profile cannot be loaded. +- **Rationale**: A system which does not meet minimum hardware specs may stall or fail due to Foreman resource usage. + +## Skipping checks + +Users may wish to skip certain checks on some playbooks, usually to work around a known issue on particularly sensitive checks. For example `health.yaml` (`foremanctl health`) includes a pattern for skipping `check_foreman_tasks`, as errored Foreman tasks which have not yet been cleared will fail the playbook. + +When allowing the user to skip a check, ensure it provides tangible value greater than the consequences of user misuse. For example, allowing the user to skip `check_system_requirements` might seem harmless, but could likely lead to situations in which usage of the parameter becomes a silent default; changes in Foreman minimum system requirements would not be enforced. Be sure to document why a skip is required in the check descriptions section above. + +Add skip parameters following this template: +``` +_skip__param: + help: | + Identify the purpose of the skipped check. + Inform the user how to repair Foreman to pass this check (or point them in the right direction). + Indicate why the user would need this check and warn against overuse. + parameter: --skip- + action: store_true + persist: false +``` diff --git a/src/filter_plugins/foremanctl.py b/src/filter_plugins/foremanctl.py index 25c9ebc1c..8d0d27e58 100644 --- a/src/filter_plugins/foremanctl.py +++ b/src/filter_plugins/foremanctl.py @@ -111,8 +111,10 @@ def available_foreman_proxy_plugins(_value): def has_feature(features, feature): - """Check if a feature is enabled - exact match or prefix (feature/).""" - return feature in features or any(f.startswith(feature + '/') for f in features) + """Check if a feature is enabled - exact match, prefix (feature/), or as a transitive dependency.""" + return (feature in features + or any(f.startswith(feature + '/') for f in features) + or feature in get_dependencies(list(features))) class FilterModule(object): diff --git a/src/playbooks/health/health.yaml b/src/playbooks/health/health.yaml index 2724d4e91..9d26e4800 100644 --- a/src/playbooks/health/health.yaml +++ b/src/playbooks/health/health.yaml @@ -7,14 +7,25 @@ - "../../vars/defaults.yml" - "../../vars/flavors/{{ flavor }}.yml" - "../../vars/base.yaml" + - "../../vars/database.yml" + - "../../vars/health.yml" tasks: - name: Execute health checks ansible.builtin.include_role: name: checks tasks_from: execute_check loop: + - check_hostname - check_services - check_foreman_api + - check_database_connection + - check_foreman_tasks + - check_host_facts_count + - check_duplicate_permissions + when: >- + (item != 'check_foreman_tasks' or not (health_skip_check_foreman_tasks_param | default(false) | bool)) + # Add additional skips here + - name: Report status of health checks ansible.builtin.fail: msg: "{{ checks_results }}" diff --git a/src/playbooks/health/metadata.obsah.yaml b/src/playbooks/health/metadata.obsah.yaml index f01ca3bbe..5733dc385 100644 --- a/src/playbooks/health/metadata.obsah.yaml +++ b/src/playbooks/health/metadata.obsah.yaml @@ -1,5 +1,14 @@ --- help: | - Run health checks on Foreman services + Check the health of your running Foreman server +variables: + health_skip_check_foreman_tasks_param: + help: | + Skip checking Foreman tasks for paused/errored tasks. + Errored tasks can be manually resumed or cancelled using the Web UI or hammer. + Use only for operations where a failed Foreman task is expected or unavoidable. + parameter: --skip-check-foreman-tasks + action: store_true + persist: false include: - _database_mode diff --git a/src/roles/check_duplicate_permissions/tasks/main.yaml b/src/roles/check_duplicate_permissions/tasks/main.yaml new file mode 100644 index 000000000..a824b3eb3 --- /dev/null +++ b/src/roles/check_duplicate_permissions/tasks/main.yaml @@ -0,0 +1,27 @@ +--- +# TODO: Remove this role one release after https://projects.theforeman.org/issues/38465 is addressed. +- name: Query for duplicate permissions + community.postgresql.postgresql_query: + db: "{{ foreman_database_name }}" + login_user: "{{ foreman_database_user }}" + login_password: "{{ foreman_database_password }}" + login_host: "{{ foreman_database_host }}" + query: | + SELECT id, name + FROM permissions p + WHERE (SELECT count(name) FROM permissions pr WHERE p.name = pr.name) > 1 + ORDER BY name, id + register: check_duplicate_permissions_result + changed_when: false + +- name: Check for duplicate permissions + when: not (health_fix | default(false) | bool) + ansible.builtin.assert: + that: + - check_duplicate_permissions_result.rowcount == 0 + fail_msg: >- + Found {{ check_duplicate_permissions_result.rowcount }} duplicate permission(s) in database: + {{ check_duplicate_permissions_result.query_result | map(attribute='name') | unique | join(', ') }} + + Duplicate permissions can cause issues with role-based access control and Foreman upgrades. + Please resolve the above duplicate permissions. diff --git a/src/roles/check_foreman_api/tasks/main.yaml b/src/roles/check_foreman_api/tasks/main.yaml index 9da24b007..1d2eede6e 100644 --- a/src/roles/check_foreman_api/tasks/main.yaml +++ b/src/roles/check_foreman_api/tasks/main.yaml @@ -14,5 +14,6 @@ fail_msg: "Foreman tasks status: {{ check_foreman_api_ping.json.results.katello.services.foreman_tasks.status | default('unknown') }}" success_msg: "Foreman tasks status: ok" when: - - enabled_features | has_feature('content') + - enabled_features | has_feature('tasks') + - enabled_features | has_feature('katello') - check_foreman_api_ping.json.results.katello is defined diff --git a/src/roles/check_foreman_tasks/tasks/main.yaml b/src/roles/check_foreman_tasks/tasks/main.yaml new file mode 100644 index 000000000..386d432c8 --- /dev/null +++ b/src/roles/check_foreman_tasks/tasks/main.yaml @@ -0,0 +1,22 @@ +--- +- name: Query foreman tasks for errors + community.postgresql.postgresql_query: + db: "{{ foreman_database_name }}" + login_user: "{{ foreman_database_user }}" + login_password: "{{ foreman_database_password }}" + login_host: "{{ foreman_database_host }}" + query: | + SELECT count(*) AS count + FROM foreman_tasks_tasks + WHERE state IN ('paused') AND result IN ('error') + register: check_foreman_tasks_error_query_result + changed_when: false + when: enabled_features | has_feature('tasks') + +- name: Check for errored foreman tasks + ansible.builtin.assert: + that: + - check_foreman_tasks_error_query_result.query_result[0].count | int == 0 + fail_msg: "{{ check_foreman_tasks_error_query_result.query_result[0].count }} foreman tasks with errors" + success_msg: "No foreman tasks with errors found" + when: enabled_features | has_feature('tasks') diff --git a/src/roles/check_host_facts_count/defaults/main.yml b/src/roles/check_host_facts_count/defaults/main.yml new file mode 100644 index 000000000..7be33b7d9 --- /dev/null +++ b/src/roles/check_host_facts_count/defaults/main.yml @@ -0,0 +1,2 @@ +--- +check_host_facts_count_max_per_host: 10000 diff --git a/src/roles/check_host_facts_count/tasks/main.yaml b/src/roles/check_host_facts_count/tasks/main.yaml new file mode 100644 index 000000000..fda296329 --- /dev/null +++ b/src/roles/check_host_facts_count/tasks/main.yaml @@ -0,0 +1,27 @@ +--- +- name: Query hosts for facts count + community.postgresql.postgresql_query: + db: "{{ foreman_database_name }}" + login_user: "{{ foreman_database_user }}" + login_password: "{{ foreman_database_password }}" + login_host: "{{ foreman_database_host }}" + query: | + SELECT fact_values.host_id, count(fact_values.id) as count + FROM fact_values + GROUP BY fact_values.host_id + ORDER BY count DESC + register: check_host_facts_count_result + changed_when: false + +- name: Check facts count is below threshold + when: check_host_facts_count_result.rowcount > 0 + vars: + hosts_over_limit: "{{ check_host_facts_count_result.query_result | selectattr('count', 'ge', check_host_facts_count_max_per_host) | list }}" + ansible.builtin.assert: + that: + - hosts_over_limit | length == 0 + fail_msg: >- + {{ hosts_over_limit | length }} host(s) exceed the facts count limit of {{ check_host_facts_count_max_per_host }}: + {{ hosts_over_limit | map(attribute='host_id') | join(', ') }} + + This can cause slow fact processing. See: https://access.redhat.com/solutions/4163891 diff --git a/src/roles/check_services/tasks/main.yaml b/src/roles/check_services/tasks/main.yaml index 2d016a967..3b3d995bf 100644 --- a/src/roles/check_services/tasks/main.yaml +++ b/src/roles/check_services/tasks/main.yaml @@ -1,66 +1,44 @@ --- -- name: Gather service facts - ansible.builtin.service_facts: +- name: Get all Foreman services as JSON + ansible.builtin.shell: + cmd: systemctl list-units --output=json --all $(systemctl list-dependencies --plain foreman.target) + register: check_services_all_json + changed_when: false + tags: + - skip_ansible_lint # Skipping linting; systemctl JSON output not available via systemd module -- name: Check core services are running - ansible.builtin.assert: - that: - - "ansible_facts.services[item + '.service'] is defined" - - "ansible_facts.services[item + '.service']['state'] == 'running'" - fail_msg: "Service {{ item }} is not running" - success_msg: "Service {{ item }} is running" - loop: - - foreman - - httpd - - redis +- name: Filter out foreman-recurring services + ansible.builtin.set_fact: + check_services_all_list: >- + {{ + (check_services_all_json.stdout | from_json) | + selectattr('unit', 'search', '\.service$') | + rejectattr('unit', 'search', 'foreman-recurring@') | + list + }} -- name: Check PostgreSQL service is running - ansible.builtin.assert: - that: - - "ansible_facts.services['postgresql.service'] is defined" - - "ansible_facts.services['postgresql.service']['state'] == 'running'" - fail_msg: "Service postgresql is not running" - success_msg: "Service postgresql is running" - when: database_mode | default('internal') == 'internal' +- name: Determine Foreman services which are inactive + ansible.builtin.set_fact: + check_services_not_running_list: >- + {{ + check_services_all_list | + rejectattr('active', 'equalto', 'active') | + list + }} -- name: Check dynflow services are running +- name: Verify all Foreman services are active ansible.builtin.assert: that: - - "ansible_facts.services[item + '.service'] is defined" - - "ansible_facts.services[item + '.service']['state'] == 'running'" - fail_msg: "Service {{ item }} is not running" - success_msg: "Service {{ item }} is running" - loop: - - dynflow-sidekiq@orchestrator - - dynflow-sidekiq@worker - - dynflow-sidekiq@worker-hosts-queue + - check_services_not_running_list | length == 0 + fail_msg: |- + Some services are not running: -- name: Check Pulp services are running - ansible.builtin.assert: - that: - - "ansible_facts.services[item + '.service'] is defined" - - "ansible_facts.services[item + '.service']['state'] == 'running'" - fail_msg: "Service {{ item }} is not running" - success_msg: "Service {{ item }} is running" - loop: - - pulp-api - - pulp-content - when: enabled_features | has_feature('content') + {% for service in check_services_not_running_list %} + - {{ service.unit.split('.')[0] }}: {{ service.active }} ({{ service.sub }}) + {% endfor %} + success_msg: |- + All services are running: -- name: Check Candlepin service is running - ansible.builtin.assert: - that: - - "ansible_facts.services['candlepin.service'] is defined" - - "ansible_facts.services['candlepin.service']['state'] == 'running'" - fail_msg: "Service candlepin is not running" - success_msg: "Service candlepin is running" - when: enabled_features | has_feature('content') - -- name: Check Foreman Proxy service is running - ansible.builtin.assert: - that: - - "ansible_facts.services['foreman-proxy.service'] is defined" - - "ansible_facts.services['foreman-proxy.service']['state'] == 'running'" - fail_msg: "Service foreman-proxy is not running" - success_msg: "Service foreman-proxy is running" - when: enabled_features | has_feature('foreman-proxy') + {% for service in check_services_all_list %} + - {{ service.unit.split('.')[0] }}: {{ service.active }} + {% endfor %} diff --git a/tests/health_test.py b/tests/health_test.py new file mode 100644 index 000000000..0087b2fcc --- /dev/null +++ b/tests/health_test.py @@ -0,0 +1,55 @@ +import subprocess + +import pytest + + +@pytest.fixture +def errored_foreman_task(database): + """Create an errored foreman task for testing, cleanup after test""" + + database.run( + "podman exec postgresql psql -U foreman -d foreman -c " + "\"DELETE FROM foreman_tasks_tasks WHERE label = 'test_errored_task'\"" + ) + database.run( + "podman exec postgresql psql -U foreman -d foreman -c " + "\"INSERT INTO foreman_tasks_tasks (id, type, label, state, result, started_at, ended_at, state_updated_at) " + "VALUES (gen_random_uuid(), 'Actions::Test::Task', 'test_errored_task', 'paused', 'error', NOW(), NOW(), NOW())\"" + ) + + yield + + # cleanup + database.run( + "podman exec postgresql psql -U foreman -d foreman -c " + "\"DELETE FROM foreman_tasks_tasks WHERE label = 'test_errored_task'\"" + ) + + +def test_health_checks_pass(): + """Verify health checks pass on a working deployment""" + + subprocess.check_call( + ['./foremanctl', 'health', '--skip-check-foreman-tasks'] + ) + + +def test_foreman_tasks_check_detects_errors(errored_foreman_task): + """Verify foreman tasks check detects errored tasks and fails""" + + result = subprocess.run( + ['./foremanctl', 'health'], + capture_output=True, + text=True + ) + output = result.stdout + result.stderr + assert result.returncode != 0 + assert 'foreman task' in output.lower() and 'error' in output.lower() + + +def test_foreman_tasks_skipped(errored_foreman_task): + """Verify foreman tasks check is skipped when passed the `--skip-check-foreman-tasks` param""" + + subprocess.check_call( + ['./foremanctl', 'health', '--skip-check-foreman-tasks'] + ) diff --git a/tests/unit/check_role_test.py b/tests/unit/check_role_test.py new file mode 100644 index 000000000..c7ba35990 --- /dev/null +++ b/tests/unit/check_role_test.py @@ -0,0 +1,31 @@ +import os + +import yaml + +TEST_DIR = os.path.dirname(os.path.abspath(__file__)) +SRC_DIR = os.path.abspath(os.path.join(TEST_DIR, '..', '..', 'src')) +ROLES_DIR = os.path.join(SRC_DIR, 'roles') + + +def ensure_role_has_feature_guards(check_role, features, tasks=None): + """Ensure selected top-level tasks in role run only when all required features are present""" + role_path = os.path.join(ROLES_DIR, check_role) + main_yaml = os.path.join(role_path, 'tasks', 'main.yaml') + with open(main_yaml, 'r') as f: + all_tasks = yaml.safe_load(f) + filtered_tasks = [t for t in all_tasks if tasks is None or t.get('name') in tasks] + for task in filtered_tasks: + assert 'when' in task, f"Task '{task.get('name')}' missing when condition" + when_condition = task['when'] + when_conditions = when_condition if isinstance(when_condition, list) else [when_condition] + expected_conditions = [f"enabled_features | has_feature('{feature}')" for feature in features] + assert set(expected_conditions) <= set(when_conditions), \ + f"Task '{task.get('name')}' missing required feature guard(s)." + + +def test_check_foreman_api_has_feature_guards(): + ensure_role_has_feature_guards('check_foreman_api', ['tasks', 'katello'], ["Check Foreman tasks status"]) + + +def test_check_foreman_tasks_has_feature_guards(): + ensure_role_has_feature_guards('check_foreman_tasks', ['tasks'])