diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 07e679352..864027221 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,5 +19,5 @@ jobs: - name: Test run: | make precheck - PYTHONPATH="./mocks:./libs:./drivers/:./misc/fairlock" coverage3 run --branch --source='./drivers,./tests,./misc/fairlock' -m unittest discover -s tests -p "*.py" -v + PYTHONPATH="./mocks:./libs:./drivers/:./misc/fairlock" coverage3 run --branch --source='./drivers,./tests,./libs/,./misc/fairlock' -m unittest discover -s tests -p "*.py" -v coverage3 report --include='./*' diff --git a/libs/sm/cleanup.py b/libs/sm/cleanup.py index a7681f174..e75f0999f 100644 --- a/libs/sm/cleanup.py +++ b/libs/sm/cleanup.py @@ -3247,6 +3247,19 @@ def abort(srUuid, soft=False): return False +def run_gc(session, srUuid, dryRun, immediate=False): + try: + _gc(session, srUuid, dryRun, immediate=immediate) + return 0 + except AbortException: + Util.log("Aborted") + return 2 + except Exception: + Util.logException("gc") + Util.log("* * * * * SR %s: ERROR\n" % srUuid) + return 1 + + def gc(session, srUuid, inBackground, dryRun=False): """Garbage collect all deleted VDIs in SR "srUuid". Fork & return immediately if inBackground=True. @@ -3272,16 +3285,10 @@ def gc(session, srUuid, inBackground, dryRun=False): # because there is no other way to propagate them back at this # point - try: - _gc(None, srUuid, dryRun) - except AbortException: - Util.log("Aborted") - except Exception: - Util.logException("gc") - Util.log("* * * * * SR %s: ERROR\n" % srUuid) + run_gc(None, srUuid, dryRun) os._exit(0) else: - _gc(session, srUuid, dryRun, immediate=True) + os._exit(run_gc(session, srUuid, dryRun, immediate=True)) def start_gc(session, sr_uuid): @@ -3349,6 +3356,11 @@ def stop_gc_service(sr_uuid): util.SMlog(f"Failed to stop gc service `SMGC@{sr_uuid}`: `{stderr}`") +def wait_for_completion(sr_uuid): + while get_state(sr_uuid): + time.sleep(5) + + def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure the SR lock is held. diff --git a/libs/sm/core/scsiutil.py b/libs/sm/core/scsiutil.py index 1750c2547..e65dba7d6 100644 --- a/libs/sm/core/scsiutil.py +++ b/libs/sm/core/scsiutil.py @@ -629,6 +629,9 @@ def refresh_devices_if_needed(primarydevice, SCSIid, currentcapacity): "current capacity." % devicesthatneedrefresh) + if any([getsize(x) != sg_readcap(x) for x in devices]): + raise util.SMException(f"Not all devices in {devices} agree on size and capacity") + def refresh_mapper_if_needed(primarydevice, SCSIid, currentcapacity): if "/dev/mapper/" in primarydevice \ and get_outdated_size_devices(currentcapacity, [primarydevice]): diff --git a/scripts/check-device-sharing b/scripts/check-device-sharing index c402ad100..eab8167f3 100755 --- a/scripts/check-device-sharing +++ b/scripts/check-device-sharing @@ -24,9 +24,9 @@ import os, sys if __name__ == "__main__": if len(sys.argv) != 2: - print "Usage:" - print " %s " % (sys.argv[0]) - print " -- return zero if the device (eg 'sda' 'sdb') is not in-use; non-zero otherwise" + print( "Usage:") + print( " %s " % (sys.argv[0])) + print( " -- return zero if the device (eg 'sda' 'sdb') is not in-use; non-zero otherwise") sys.exit(2) device = sys.argv[1] diff --git a/scripts/plugins/coalesce-leaf b/scripts/plugins/coalesce-leaf index 7c21505cd..ab8dba18d 100755 --- a/scripts/plugins/coalesce-leaf +++ b/scripts/plugins/coalesce-leaf @@ -111,7 +111,7 @@ def leaf_coalesce(session, coalesceable_vdis): cleanup.Util.logException("leaf_coalesce") return RET_ERROR_XAPI log_msg("Coalescing all in SR %s..." % sr_uuid) - cleanup.gc(session, sr_uuid, False) + cleanup.run_gc(session, sr_uuid, False, immediate=True) return RET_SUCCESS @@ -181,7 +181,11 @@ def vm_leaf_coalesce(session, vm_uuid): for sr_uuid in coalesceable_vdis: # do regular GC now to minimize downtime - cleanup.gc(session, sr_uuid, False) + cleanup.run_gc(session, sr_uuid, False, immediate=True) + + for sr_uuid in coalesceable_vdis: + # wait for the GC process for this SR to complete + cleanup.wait_for_completion(sr_uuid) suspended = False if vm_rec["power_state"] == "Running": @@ -200,7 +204,7 @@ def vm_leaf_coalesce(session, vm_uuid): session.xenapi.VM.resume(vm_ref, False, False) for sr_uuid in list(coalesceable_vdis.keys()): # cleans up any potential failures - cleanup.gc(session, sr_uuid, True) + cleanup.run_gc(session, sr_uuid, True, immediate=True) return ret, messages diff --git a/tests/test_cleanup.py b/tests/test_cleanup.py index ce8a7a41e..941e365e3 100644 --- a/tests/test_cleanup.py +++ b/tests/test_cleanup.py @@ -1,5 +1,6 @@ import errno import signal +import subprocess import unittest import unittest.mock as mock import uuid @@ -187,9 +188,10 @@ def test_term_handler(self): self.assertTrue(cleanup.SIGTERM) + @mock.patch('sm.cleanup.os._exit', autospec=True) @mock.patch('sm.cleanup._create_init_file', autospec=True) @mock.patch('sm.cleanup.SR', autospec=True) - def test_loop_exits_on_term(self, mock_init, mock_sr): + def test_loop_exits_on_term(self, mock_init, mock_sr, mock_exit): # Set the term signel cleanup.receiveSignal(signal.SIGTERM, None) mock_session = mock.MagicMock(name='MockSession') @@ -1814,9 +1816,10 @@ def test_gcloop_one_of_each(self, mock_init_file): mock_sr.coalesce.assert_called_with(vdis['vdi'], False) mock_sr.coalesceLeaf.assert_called_with(vdis['vdi'], False) + @mock.patch('sm.cleanup.os._exit', autospec=True) @mock.patch('sm.cleanup.Util') @mock.patch('sm.cleanup._gc', autospec=True) - def test_gc_foreground_is_immediate(self, mock_gc, mock_util): + def test_gc_foreground_is_immediate(self, mock_gc, mock_util, mock_exit): """ GC called in foreground will run immediate """ @@ -1831,6 +1834,42 @@ def test_gc_foreground_is_immediate(self, mock_gc, mock_util): mock_gc.assert_called_with(mock_session, sr_uuid, False, immediate=True) + @mock.patch('sm.cleanup.os._exit', autospec=True) + @mock.patch('sm.cleanup.Util') + @mock.patch('sm.cleanup._gc', autospec=True) + def test_gc_foreground_abort_exit_code(self, mock_gc, mock_util, mock_exit): + """ + GC called in foreground will run immediate + """ + ## Arrange + mock_session = mock.MagicMock(name='MockSession') + sr_uuid = str(uuid4()) + mock_gc.side_effect = cleanup.AbortException + + ## Act + cleanup.gc(mock_session, sr_uuid, inBackground=False) + + ## Assert + mock_exit.assert_called_once_with(2) + + @mock.patch('sm.cleanup.os._exit', autospec=True) + @mock.patch('sm.cleanup.Util') + @mock.patch('sm.cleanup._gc', autospec=True) + def test_gc_foreground_exception_exit_code(self, mock_gc, mock_util, mock_exit): + """ + GC called in foreground will run immediate + """ + ## Arrange + mock_session = mock.MagicMock(name='MockSession') + sr_uuid = str(uuid4()) + mock_gc.side_effect = Exception + + ## Act + cleanup.gc(mock_session, sr_uuid, inBackground=False) + + ## Assert + mock_exit.assert_called_once_with(1) + @mock.patch('sm.cleanup.os._exit', autospec=True) @mock.patch('sm.cleanup.daemonize', autospec=True) @mock.patch('sm.cleanup.Util') @@ -1849,7 +1888,7 @@ def test_gc_background_is_not_immediate( cleanup.gc(mock_session, sr_uuid, inBackground=True) ## Assert - mock_gc.assert_called_with(None, sr_uuid, False) + mock_gc.assert_called_with(None, sr_uuid, False, immediate=False) mock_daemonize.assert_called_with() def test_not_plugged(self): @@ -2064,3 +2103,52 @@ def forgetVdi(self, uuid): self.xapi_mock.forgetVDI.side_effect = forgetVdi self.mock_sr._finishInterruptedCoalesceLeaf(child_vdi_uuid, parent_vdi_uuid) + + +class TestService(unittest.TestCase): + + def setUp(self): + self.addCleanup(mock.patch.stopall) + run_patcher = mock.patch("sm.cleanup.subprocess.run", autospec=True) + self.mock_run = run_patcher.start() + + sleep_patcher = mock.patch("sm.cleanup.time.sleep", autospec=True) + self.mock_sleep = sleep_patcher.start() + + def test_wait_for_completion_noop(self): + # Arrange + sr_uuid = str(uuid4()) + sr_uuid_esc = sr_uuid.replace("-", "\\x2d") + self.mock_run.return_value = subprocess.CompletedProcess("", 0, b"unknown") + + # Act + cleanup.wait_for_completion(sr_uuid) + + # Assert + self.mock_run.assert_called_once_with( + ["/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) + + def test_wait_for_completion_wait_2(self): + # Arrange + sr_uuid = str(uuid4()) + sr_uuid_esc = sr_uuid.replace("-", "\\x2d") + + activating_process = subprocess.CompletedProcess("", 0, b"activating") + finished_process = subprocess.CompletedProcess("", 0, b"unknown") + self.mock_run.side_effect = [activating_process, activating_process, finished_process] + + # Act + cleanup.wait_for_completion(sr_uuid) + + # Assert + self.mock_run.assert_has_calls([ + mock.call( + ["/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True), + mock.call( + ["/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True), + mock.call( + ["/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)])