Transition stale failed builds to the "garbage" state and untag their components

This commit is contained in:
mprahl
2017-11-15 17:20:24 -05:00
parent 9890277620
commit ef5dc64f2b
7 changed files with 117 additions and 4 deletions

View File

@@ -382,6 +382,26 @@ chmod 644 %buildroot/%_sysconfdir/rpm/macros.zz-modules
self.koji_session.tagBuild(dest_tag, nvr)
self.koji_session.multiCall(strict=True)
def untag_artifacts(self, artifacts):
"""
"""
dest_tag = self._get_tag(self.module_tag)['id']
build_tag = self._get_tag(self.module_build_tag)['id']
# Get the NVRs in the tags to make sure the builds exist and they're tagged before
# untagging them
dest_tagged_nvrs = self._get_tagged_nvrs(self.module_tag['name'])
build_tagged_nvrs = self._get_tagged_nvrs(self.module_build_tag['name'])
self.koji_session.multicall = True
for nvr in artifacts:
if nvr in dest_tagged_nvrs:
log.info("%r untagging %r from %r" % (self, nvr, dest_tag))
self.koji_session.untagBuild(dest_tag, nvr)
if nvr in build_tagged_nvrs:
log.info("%r untagging %r from %r" % (self, nvr, build_tag))
self.koji_session.untagBuild(build_tag, nvr)
self.koji_session.multiCall(strict=True)
def wait_task(self, task_id):
"""
:param task_id

View File

@@ -124,7 +124,7 @@ class GenericBuilder(six.with_metaclass(ABCMeta)):
raise ValueError("Builder backend='%s' not recognized" % backend)
@classmethod
def create_from_module(cls, session, module, config):
def create_from_module(cls, session, module, config, proxy_user=True):
"""
Creates new GenericBuilder instance based on the data from module
and config and connects it to buildroot.
@@ -132,11 +132,15 @@ class GenericBuilder(six.with_metaclass(ABCMeta)):
:param session: SQLAlchemy databa session.
:param module: module_build_service.models.ModuleBuild instance.
:param config: module_build_service.config.Config instance.
:param proxy_user: a boolean that determines if the Koji session should use the module
owner as a proxy user.
"""
owner = None
if proxy_user is True:
owner = module.owner
components = [c.package for c in module.component_builds]
builder = GenericBuilder.create(
module.owner, module, config.system, config,
tag_name=module.koji_tag, components=components)
owner, module, config.system, config, tag_name=module.koji_tag, components=components)
groups = GenericBuilder.default_buildroot_groups(session, module)
builder.buildroot_connect(groups)
return builder

View File

@@ -420,7 +420,12 @@ class Config(object):
'type': list,
'default': SUPPORTED_STRATEGIES,
'desc': ('The allowed module rebuild strategies. This is only used when '
'REBUILD_STRATEGY_ALLOW_OVERRIDE is True.')}
'REBUILD_STRATEGY_ALLOW_OVERRIDE is True.')},
'cleanup_failed_builds_time': {
'type': int,
'default': 180,
'desc': ('Time in days when to cleanup failed module builds and transition them to '
'the "garbage" state.')}
}
def __init__(self, conf_section_obj):
@@ -607,3 +612,8 @@ class Config(object):
.format(', '.join(SUPPORTED_STRATEGIES)))
self._rebuild_strategies_allowed = strategies
def _setifok_cleanup_failed_builds_time(self, num_days):
if num_days < 1:
raise ValueError('CLEANUP_FAILED_BUILDS_TIME must be set to 1 or more days')
self._cleanup_failed_builds_time = num_days

View File

@@ -74,6 +74,9 @@ BUILD_STATES = {
# larger compose. perhaps it is set by an external service that knows
# about the Grand Plan.
"ready": 5,
# If the module has failed and was garbage collected by MBS
"garbage": 6
}
INVERSE_BUILD_STATES = {v: k for k, v in BUILD_STATES.items()}

View File

@@ -121,6 +121,7 @@ class MBSConsumer(fedmsg.consumers.FedmsgConsumer):
"done"]: module_build_service.scheduler.handlers.modules.done,
# XXX: DIRECT TRANSITION TO READY
models.BUILD_STATES["ready"]: NO_OP,
models.BUILD_STATES["garbage"]: NO_OP
}
# Only one kind of repo change event, though...
self.on_repo_change = module_build_service.scheduler.handlers.repos.done

View File

@@ -50,6 +50,7 @@ class MBSProducer(PollingProducer):
self.process_paused_module_builds(conf, session)
self.trigger_new_repo_when_stalled(conf, session)
self.delete_old_koji_targets(conf, session)
self.cleanup_stale_failed_builds(conf, session)
except Exception:
msg = 'Error in poller execution:'
log.exception(msg)
@@ -138,6 +139,41 @@ class MBSProducer(PollingProducer):
elif conf.system == 'mock':
pass
def cleanup_stale_failed_builds(self, conf, session):
""" Does various clean up tasks on stale failed module builds
:param conf: the MBS configuration object
:param session: a SQLAlchemy database session
"""
if conf.system == 'koji':
stale_date = datetime.utcnow() - timedelta(
days=conf.cleanup_failed_builds_time)
stale_module_builds = session.query(models.ModuleBuild).filter(
models.ModuleBuild.state == models.BUILD_STATES['failed'],
models.ModuleBuild.time_modified <= stale_date).all()
if stale_module_builds:
log.info('{0} stale failed module build(s) will be cleaned up'.format(
len(stale_module_builds)))
for module in stale_module_builds:
log.info('{0!r} is stale and is being cleaned up'.format(module))
# Find completed artifacts in the stale build
artifacts = [c for c in module.component_builds
if c.state == koji.BUILD_STATES['COMPLETE']]
# Set proxy_user=False to not authenticate as the module owner for these tasks
builder = GenericBuilder.create_from_module(
session, module, conf, proxy_user=False)
builder.untag_artifacts([c.nvr for c in artifacts])
# Mark the artifacts as untagged in the database
for c in artifacts:
c.tagged = False
c.tagged_in_final = False
session.add(c)
state_reason = ('The module was garbage collected since it has failed over {0}'
' day(s) ago'.format(conf.cleanup_failed_builds_time))
module.transition(
conf, models.BUILD_STATES['garbage'], state_reason=state_reason)
session.add(module)
session.commit()
def log_summary(self, session):
log.info('Current status:')
consumer = module_build_service.scheduler.consumer.get_global_consumer()

View File

@@ -337,3 +337,42 @@ class TestPoller(unittest.TestCase):
# Ensure we did *not* process any of the non-waiting builds.
self.assertEquals(consumer.incoming.qsize(), 0)
def test_cleanup_stale_failed_builds(self, create_builder, koji_get_session,
global_consumer, dbg):
""" Test that one of the two module builds gets to the garbage state when running
cleanup_stale_failed_builds.
"""
module_build_one = models.ModuleBuild.query.get(1)
module_build_two = models.ModuleBuild.query.get(2)
module_build_one.state = models.BUILD_STATES['failed']
module_build_one.time_modified = datetime.utcnow()
module_build_two.state = models.BUILD_STATES['failed']
module_build_two.time_modified = datetime.utcnow() - timedelta(
days=conf.cleanup_failed_builds_time + 1)
db.session.add(module_build_one)
db.session.add(module_build_two)
db.session.commit()
db.session.expire(module_build_two)
consumer = mock.MagicMock()
consumer.incoming = queue.Queue()
global_consumer.return_value = consumer
hub = mock.MagicMock()
poller = MBSProducer(hub)
# Ensure the queue is empty before we start
self.assertEquals(consumer.incoming.qsize(), 0)
poller.cleanup_stale_failed_builds(conf, db.session)
db.session.refresh(module_build_two)
# Make sure module_build_two was transitioned to garbage
self.assertEqual(module_build_two.state, models.BUILD_STATES['garbage'])
state_reason = ('The module was garbage collected since it has failed over {0} day(s) ago'
.format(conf.cleanup_failed_builds_time))
self.assertEqual(module_build_two.state_reason, state_reason)
# Make sure all the components are marked as untagged in the database
for component in module_build_two.component_builds:
self.assertFalse(component.tagged)
self.assertFalse(component.tagged_in_final)
# Make sure module_build_one stayed the same
self.assertEqual(module_build_one.state, models.BUILD_STATES['failed'])