From ef5dc64f2bd8be64b1e812eb7c723755c4b284dd Mon Sep 17 00:00:00 2001 From: mprahl Date: Wed, 15 Nov 2017 17:20:24 -0500 Subject: [PATCH] Transition stale failed builds to the "garbage" state and untag their components --- .../builder/KojiModuleBuilder.py | 20 ++++++++++ module_build_service/builder/base.py | 10 +++-- module_build_service/config.py | 12 +++++- module_build_service/models.py | 3 ++ module_build_service/scheduler/consumer.py | 1 + module_build_service/scheduler/producer.py | 36 +++++++++++++++++ tests/test_scheduler/test_poller.py | 39 +++++++++++++++++++ 7 files changed, 117 insertions(+), 4 deletions(-) diff --git a/module_build_service/builder/KojiModuleBuilder.py b/module_build_service/builder/KojiModuleBuilder.py index 918ad998..81f6ae25 100644 --- a/module_build_service/builder/KojiModuleBuilder.py +++ b/module_build_service/builder/KojiModuleBuilder.py @@ -382,6 +382,26 @@ chmod 644 %buildroot/%_sysconfdir/rpm/macros.zz-modules self.koji_session.tagBuild(dest_tag, nvr) self.koji_session.multiCall(strict=True) + def untag_artifacts(self, artifacts): + """ + """ + dest_tag = self._get_tag(self.module_tag)['id'] + build_tag = self._get_tag(self.module_build_tag)['id'] + # Get the NVRs in the tags to make sure the builds exist and they're tagged before + # untagging them + dest_tagged_nvrs = self._get_tagged_nvrs(self.module_tag['name']) + build_tagged_nvrs = self._get_tagged_nvrs(self.module_build_tag['name']) + + self.koji_session.multicall = True + for nvr in artifacts: + if nvr in dest_tagged_nvrs: + log.info("%r untagging %r from %r" % (self, nvr, dest_tag)) + self.koji_session.untagBuild(dest_tag, nvr) + if nvr in build_tagged_nvrs: + log.info("%r untagging %r from %r" % (self, nvr, build_tag)) + self.koji_session.untagBuild(build_tag, nvr) + self.koji_session.multiCall(strict=True) + def wait_task(self, task_id): """ :param task_id diff --git a/module_build_service/builder/base.py b/module_build_service/builder/base.py index dd15e9e9..ed04967f 100644 --- a/module_build_service/builder/base.py +++ b/module_build_service/builder/base.py @@ -124,7 +124,7 @@ class GenericBuilder(six.with_metaclass(ABCMeta)): raise ValueError("Builder backend='%s' not recognized" % backend) @classmethod - def create_from_module(cls, session, module, config): + def create_from_module(cls, session, module, config, proxy_user=True): """ Creates new GenericBuilder instance based on the data from module and config and connects it to buildroot. @@ -132,11 +132,15 @@ class GenericBuilder(six.with_metaclass(ABCMeta)): :param session: SQLAlchemy databa session. :param module: module_build_service.models.ModuleBuild instance. :param config: module_build_service.config.Config instance. + :param proxy_user: a boolean that determines if the Koji session should use the module + owner as a proxy user. """ + owner = None + if proxy_user is True: + owner = module.owner components = [c.package for c in module.component_builds] builder = GenericBuilder.create( - module.owner, module, config.system, config, - tag_name=module.koji_tag, components=components) + owner, module, config.system, config, tag_name=module.koji_tag, components=components) groups = GenericBuilder.default_buildroot_groups(session, module) builder.buildroot_connect(groups) return builder diff --git a/module_build_service/config.py b/module_build_service/config.py index 70da0dba..8c664d0f 100644 --- a/module_build_service/config.py +++ b/module_build_service/config.py @@ -420,7 +420,12 @@ class Config(object): 'type': list, 'default': SUPPORTED_STRATEGIES, 'desc': ('The allowed module rebuild strategies. This is only used when ' - 'REBUILD_STRATEGY_ALLOW_OVERRIDE is True.')} + 'REBUILD_STRATEGY_ALLOW_OVERRIDE is True.')}, + 'cleanup_failed_builds_time': { + 'type': int, + 'default': 180, + 'desc': ('Time in days when to cleanup failed module builds and transition them to ' + 'the "garbage" state.')} } def __init__(self, conf_section_obj): @@ -607,3 +612,8 @@ class Config(object): .format(', '.join(SUPPORTED_STRATEGIES))) self._rebuild_strategies_allowed = strategies + + def _setifok_cleanup_failed_builds_time(self, num_days): + if num_days < 1: + raise ValueError('CLEANUP_FAILED_BUILDS_TIME must be set to 1 or more days') + self._cleanup_failed_builds_time = num_days diff --git a/module_build_service/models.py b/module_build_service/models.py index 01075e76..f753f42b 100644 --- a/module_build_service/models.py +++ b/module_build_service/models.py @@ -74,6 +74,9 @@ BUILD_STATES = { # larger compose. perhaps it is set by an external service that knows # about the Grand Plan. "ready": 5, + + # If the module has failed and was garbage collected by MBS + "garbage": 6 } INVERSE_BUILD_STATES = {v: k for k, v in BUILD_STATES.items()} diff --git a/module_build_service/scheduler/consumer.py b/module_build_service/scheduler/consumer.py index f59c0971..b5a80326 100644 --- a/module_build_service/scheduler/consumer.py +++ b/module_build_service/scheduler/consumer.py @@ -121,6 +121,7 @@ class MBSConsumer(fedmsg.consumers.FedmsgConsumer): "done"]: module_build_service.scheduler.handlers.modules.done, # XXX: DIRECT TRANSITION TO READY models.BUILD_STATES["ready"]: NO_OP, + models.BUILD_STATES["garbage"]: NO_OP } # Only one kind of repo change event, though... self.on_repo_change = module_build_service.scheduler.handlers.repos.done diff --git a/module_build_service/scheduler/producer.py b/module_build_service/scheduler/producer.py index 09e0e741..a2c865c0 100644 --- a/module_build_service/scheduler/producer.py +++ b/module_build_service/scheduler/producer.py @@ -50,6 +50,7 @@ class MBSProducer(PollingProducer): self.process_paused_module_builds(conf, session) self.trigger_new_repo_when_stalled(conf, session) self.delete_old_koji_targets(conf, session) + self.cleanup_stale_failed_builds(conf, session) except Exception: msg = 'Error in poller execution:' log.exception(msg) @@ -138,6 +139,41 @@ class MBSProducer(PollingProducer): elif conf.system == 'mock': pass + def cleanup_stale_failed_builds(self, conf, session): + """ Does various clean up tasks on stale failed module builds + :param conf: the MBS configuration object + :param session: a SQLAlchemy database session + """ + if conf.system == 'koji': + stale_date = datetime.utcnow() - timedelta( + days=conf.cleanup_failed_builds_time) + stale_module_builds = session.query(models.ModuleBuild).filter( + models.ModuleBuild.state == models.BUILD_STATES['failed'], + models.ModuleBuild.time_modified <= stale_date).all() + if stale_module_builds: + log.info('{0} stale failed module build(s) will be cleaned up'.format( + len(stale_module_builds))) + for module in stale_module_builds: + log.info('{0!r} is stale and is being cleaned up'.format(module)) + # Find completed artifacts in the stale build + artifacts = [c for c in module.component_builds + if c.state == koji.BUILD_STATES['COMPLETE']] + # Set proxy_user=False to not authenticate as the module owner for these tasks + builder = GenericBuilder.create_from_module( + session, module, conf, proxy_user=False) + builder.untag_artifacts([c.nvr for c in artifacts]) + # Mark the artifacts as untagged in the database + for c in artifacts: + c.tagged = False + c.tagged_in_final = False + session.add(c) + state_reason = ('The module was garbage collected since it has failed over {0}' + ' day(s) ago'.format(conf.cleanup_failed_builds_time)) + module.transition( + conf, models.BUILD_STATES['garbage'], state_reason=state_reason) + session.add(module) + session.commit() + def log_summary(self, session): log.info('Current status:') consumer = module_build_service.scheduler.consumer.get_global_consumer() diff --git a/tests/test_scheduler/test_poller.py b/tests/test_scheduler/test_poller.py index c03e81da..b9576b9a 100644 --- a/tests/test_scheduler/test_poller.py +++ b/tests/test_scheduler/test_poller.py @@ -337,3 +337,42 @@ class TestPoller(unittest.TestCase): # Ensure we did *not* process any of the non-waiting builds. self.assertEquals(consumer.incoming.qsize(), 0) + + def test_cleanup_stale_failed_builds(self, create_builder, koji_get_session, + global_consumer, dbg): + """ Test that one of the two module builds gets to the garbage state when running + cleanup_stale_failed_builds. + """ + module_build_one = models.ModuleBuild.query.get(1) + module_build_two = models.ModuleBuild.query.get(2) + module_build_one.state = models.BUILD_STATES['failed'] + module_build_one.time_modified = datetime.utcnow() + module_build_two.state = models.BUILD_STATES['failed'] + module_build_two.time_modified = datetime.utcnow() - timedelta( + days=conf.cleanup_failed_builds_time + 1) + db.session.add(module_build_one) + db.session.add(module_build_two) + db.session.commit() + db.session.expire(module_build_two) + + consumer = mock.MagicMock() + consumer.incoming = queue.Queue() + global_consumer.return_value = consumer + hub = mock.MagicMock() + poller = MBSProducer(hub) + + # Ensure the queue is empty before we start + self.assertEquals(consumer.incoming.qsize(), 0) + poller.cleanup_stale_failed_builds(conf, db.session) + db.session.refresh(module_build_two) + # Make sure module_build_two was transitioned to garbage + self.assertEqual(module_build_two.state, models.BUILD_STATES['garbage']) + state_reason = ('The module was garbage collected since it has failed over {0} day(s) ago' + .format(conf.cleanup_failed_builds_time)) + self.assertEqual(module_build_two.state_reason, state_reason) + # Make sure all the components are marked as untagged in the database + for component in module_build_two.component_builds: + self.assertFalse(component.tagged) + self.assertFalse(component.tagged_in_final) + # Make sure module_build_one stayed the same + self.assertEqual(module_build_one.state, models.BUILD_STATES['failed'])