Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions unit_tests/charm_tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ def test_enable_hugepages_vfio_on_hvs_in_vms(self):
self.patch_object(test_utils.zaza.utilities.machine_os,
'enable_vfio_unsafe_noiommu_mode')
self.patch_object(test_utils.model, 'wait_for_application_states')
self.patch_target('_wait_for_juju_symlinks_after_reboot')

nr_hugepages = 4
unit = mock.MagicMock()
Expand All @@ -239,6 +240,8 @@ def test_enable_hugepages_vfio_on_hvs_in_vms(self):
unit,
nr_hugepages,
model_name=self.target.model_name)
self._wait_for_juju_symlinks_after_reboot.assert_called_once_with(
unit.name)
self.enable_vfio_unsafe_noiommu_mode.assert_called_once_with(
unit,
model_name=self.target.model_name)
Expand All @@ -256,6 +259,7 @@ def test_enable_hugepages_vfio_on_hvs_in_vms_kvm_kernel(self):
self.patch_object(test_utils.zaza.utilities.machine_os,
'enable_vfio_unsafe_noiommu_mode')
self.patch_object(test_utils.model, 'wait_for_application_states')
self.patch_target('_wait_for_juju_symlinks_after_reboot')

nr_hugepages = 4
unit = mock.MagicMock()
Expand Down Expand Up @@ -286,6 +290,8 @@ def test_enable_hugepages_vfio_on_hvs_in_vms_kvm_kernel(self):
unit,
nr_hugepages,
model_name=self.target.model_name)
self._wait_for_juju_symlinks_after_reboot.assert_called_once_with(
unit.name)
self.enable_vfio_unsafe_noiommu_mode.assert_called_once_with(
unit,
model_name=self.target.model_name)
Expand All @@ -310,6 +316,7 @@ def test_enable_hugepages_vfio_on_hvs_in_vms_recover_unit_error(self):
'enable_vfio_unsafe_noiommu_mode')
self.patch_object(test_utils.model, 'wait_for_application_states')
self.patch_object(test_utils.model, 'resolve_units')
self.patch_target('_wait_for_juju_symlinks_after_reboot')

nr_hugepages = 4
unit = mock.MagicMock()
Expand Down Expand Up @@ -350,6 +357,37 @@ def test_enable_hugepages_vfio_on_hvs_in_vms_recover_unit_error(self):
]
)

def test_wait_for_juju_symlinks_after_reboot(self):
"""Test waiting for Juju symlink recreation after reboot."""
self.patch_object(test_utils.zaza.utilities.juju, 'remote_run')
self.target.model_name = 'zaza-123'

self.target._wait_for_juju_symlinks_after_reboot('ovn-chassis/0')

escaped_unit = 'ovn-chassis-0'
expected_cmd = (
"grep -qPz '(?s)Reboot.*?\\n.*?symlinks.*{}' "
"/var/log/juju/machine-*.log".format(escaped_unit))
self.remote_run.assert_called_once_with(
'ovn-chassis/0',
expected_cmd,
model_name='zaza-123',
fatal=True)

def test_wait_for_juju_symlinks_after_reboot_retries(self):
"""Test that waiting for symlinks retries on failure."""
self.patch_object(test_utils.zaza.utilities.juju, 'remote_run')
self.target.model_name = 'zaza-123'

self.remote_run.side_effect = [
test_utils.zaza.model.CommandRunFailed('grep', {'Code': '1'}),
None,
]

self.target._wait_for_juju_symlinks_after_reboot('ovn-chassis/0')

self.assertEqual(self.remote_run.call_count, 2)


class TestOpenStackBaseTest(ut_utils.BaseTestCase):

Expand Down
35 changes: 35 additions & 0 deletions zaza/openstack/charm_tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,39 @@ def assert_unit_cpu_topology(self, unit, nr_1g_hugepages):
mbtotal,
self.assert_unit_cpu_topology.__doc__))

def _wait_for_juju_symlinks_after_reboot(self, unit_name):
"""Wait for Juju to recreate tool symlinks after a machine reboot.

After a reboot triggered by Juju, symlinks for unit tools may not be
recreated until slightly after the reboot completes. Interacting with
a unit before symlink recreation finishes may cause the unit to enter
an error state (ref: https://launchpad.net/bugs/2077936).

This function monitors the Juju machine log on the unit for evidence
that both the reboot and subsequent symlink recreation for the given
unit have been recorded, indicating it is safe to continue.

:param unit_name: Name of the unit to wait for (e.g. 'ovn-chassis/0')
:type unit_name: str
"""
escaped_unit = unit_name.replace('/', '-')
grep_cmd = (
'grep -qPz \'(?s)Reboot.*?\\n.*?symlinks.*{}\' '
'/var/log/juju/machine-*.log'.format(escaped_unit))
logging.info(
'Waiting for Juju reboot and symlink recreation to be '
'logged for {}'.format(unit_name))
for attempt in tenacity.Retrying(
stop=tenacity.stop_after_attempt(30),
wait=tenacity.wait_exponential(multiplier=1, min=2, max=30)):
with attempt:
zaza.utilities.juju.remote_run(
unit_name, grep_cmd,
model_name=self.model_name, fatal=True)
logging.info(
'Juju reboot and symlink recreation confirmed '
'for {}'.format(unit_name))

def enable_hugepages_vfio_on_hvs_in_vms(self, nr_1g_hugepages):
"""Enable hugepages and unsafe VFIO NOIOMMU on virtual hypervisors."""
for unit in model.get_units(
Expand Down Expand Up @@ -755,12 +788,14 @@ def enable_hugepages_vfio_on_hvs_in_vms(self, nr_1g_hugepages):
try:
zaza.utilities.machine_os.enable_hugepages(
unit, nr_1g_hugepages, model_name=self.model_name)
self._wait_for_juju_symlinks_after_reboot(unit.name)
except zaza.model.UnitError:
logging.warn(f'Unit {unit.name} went into error state during'
' huge pages enablement. Attempting to recover.'
' Possible cause:'
' https://bugs.launchpad.net/juju/+bug/2077936')
zaza.model.resolve_units()
self._wait_for_juju_symlinks_after_reboot(unit.name)

try:
logging.info('Enabling unsafe VFIO NOIOMMU mode on {}'
Expand Down
Loading