Files
fm-orchestrator/module_build_service/utils.py

548 lines
20 KiB
Python

# Copyright (c) 2016 Red Hat, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# Written by Ralph Bean <rbean@redhat.com>
# Matt Prahl <mprahl@redhat.com>
""" Utility functions for module_build_service. """
import re
import functools
import time
import shutil
import tempfile
import os
import logging
import modulemd
from flask import request, url_for
from datetime import datetime
from module_build_service import log, models
from module_build_service.errors import ValidationError, UnprocessableEntity
from module_build_service import conf, db
from module_build_service.errors import (Unauthorized, Conflict)
import module_build_service.messaging
from multiprocessing.dummy import Pool as ThreadPool
def retry(timeout=conf.net_timeout, interval=conf.net_retry_interval, wait_on=Exception):
""" A decorator that allows to retry a section of code...
...until success or timeout.
"""
def wrapper(function):
@functools.wraps(function)
def inner(*args, **kwargs):
start = time.time()
while True:
if (time.time() - start) >= timeout:
raise # This re-raises the last exception.
try:
return function(*args, **kwargs)
except wait_on as e:
log.warn("Exception %r raised from %r. Retry in %rs" % (
e, function, interval))
time.sleep(interval)
return inner
return wrapper
def at_concurrent_component_threshold(config, session):
"""
Determines if the number of concurrent component builds has reached
the configured threshold
:param config: Module Build Service configuration object
:param session: SQLAlchemy database session
:return: boolean representing if there are too many concurrent builds at
this time
"""
import koji # Placed here to avoid py2/py3 conflicts...
if config.num_consecutive_builds and config.num_consecutive_builds <= \
session.query(models.ComponentBuild).filter_by(
state=koji.BUILD_STATES['BUILDING']).count():
return True
return False
def start_build_batch(config, module, session, builder, components=None):
"""
Starts a round of the build cycle for a module.
Returns list of BaseMessage instances which should be scheduled by the
scheduler.
"""
import koji # Placed here to avoid py2/py3 conflicts...
if any([c.state == koji.BUILD_STATES['BUILDING']
for c in module.component_builds]):
raise ValueError("Cannot start a batch when another is in flight.")
# The user can either pass in a list of components to 'seed' the batch, or
# if none are provided then we just select everything that hasn't
# successfully built yet or isn't currently being built.
unbuilt_components = components or [
c for c in module.component_builds
if (c.state != koji.BUILD_STATES['COMPLETE']
and c.state != koji.BUILD_STATES['BUILDING']
and c.state != koji.BUILD_STATES['FAILED']
and c.batch == module.batch)
]
log.info("Starting build of next batch %d, %s" % (module.batch,
unbuilt_components))
for c in unbuilt_components:
if at_concurrent_component_threshold(config, session):
log.info('Concurrent build threshold met')
break
try:
c.task_id, c.state, c.state_reason, c.nvr = builder.build(
artifact_name=c.package, source=c.scmurl)
except Exception as e:
c.state = koji.BUILD_STATES['FAILED']
c.state_reason = "Failed to submit artifact %s to Koji: %s" % (c.package, str(e))
continue
if not c.task_id and c.state == koji.BUILD_STATES['BUILDING']:
c.state = koji.BUILD_STATES['FAILED']
c.state_reason = "Failed to submit artifact %s to Koji" % (c.package)
continue
further_work = []
# If all components in this batch are already done, it can mean that they
# have been built in the past and have been skipped in this module build.
# We therefore have to generate fake KojiRepoChange message, because the
# repo has been also done in the past and build system will not send us
# any message now.
if (all(c.state == koji.BUILD_STATES['COMPLETE'] for c in unbuilt_components)
and builder.module_build_tag):
further_work += [module_build_service.messaging.KojiRepoChange(
'start_build_batch: fake msg', builder.module_build_tag['name'])]
session.commit()
return further_work
def pagination_metadata(p_query):
"""
Returns a dictionary containing metadata about the paginated query. This must be run as part of a Flask request.
:param p_query: flask_sqlalchemy.Pagination object
:return: a dictionary containing metadata about the paginated query
"""
pagination_data = {
'page': p_query.page,
'per_page': p_query.per_page,
'total': p_query.total,
'pages': p_query.pages,
'first': url_for(request.endpoint, page=1, per_page=p_query.per_page, _external=True),
'last': url_for(request.endpoint, page=p_query.pages, per_page=p_query.per_page, _external=True)
}
if p_query.has_prev:
pagination_data['prev'] = url_for(request.endpoint, page=p_query.prev_num,
per_page=p_query.per_page, _external=True)
if p_query.has_next:
pagination_data['next'] = url_for(request.endpoint, page=p_query.next_num,
per_page=p_query.per_page, _external=True)
return pagination_data
def filter_module_builds(flask_request):
"""
Returns a flask_sqlalchemy.Pagination object based on the request parameters
:param request: Flask request object
:return: flask_sqlalchemy.Pagination
"""
search_query = dict()
state = flask_request.args.get('state', None)
if state:
if state.isdigit():
search_query['state'] = state
else:
if state in models.BUILD_STATES:
search_query['state'] = models.BUILD_STATES[state]
else:
raise ValidationError('An invalid state was supplied')
for key in ['name', 'owner']:
if flask_request.args.get(key, None):
search_query[key] = flask_request.args[key]
query = models.ModuleBuild.query
if search_query:
query = query.filter_by(**search_query)
# This is used when filtering the date request parameters, but it is here to avoid recompiling
utc_iso_datetime_regex = re.compile(r'^(?P<datetime>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(?:\.\d+)?'
r'(?:Z|[-+]00(?::00)?)?$')
# Filter the query based on date request parameters
for item in ('submitted', 'modified', 'completed'):
for context in ('before', 'after'):
request_arg = '%s_%s' % (item, context) # i.e. submitted_before
iso_datetime_arg = request.args.get(request_arg, None)
if iso_datetime_arg:
iso_datetime_matches = re.match(utc_iso_datetime_regex, iso_datetime_arg)
if not iso_datetime_matches or not iso_datetime_matches.group('datetime'):
raise ValidationError('An invalid Zulu ISO 8601 timestamp was provided for the "%s" parameter'
% request_arg)
# Converts the ISO 8601 string to a datetime object for SQLAlchemy to use to filter
item_datetime = datetime.strptime(iso_datetime_matches.group('datetime'), '%Y-%m-%dT%H:%M:%S')
# Get the database column to filter against
column = getattr(models.ModuleBuild, 'time_' + item)
if context == 'after':
query = query.filter(column >= item_datetime)
elif context == 'before':
query = query.filter(column <= item_datetime)
page = flask_request.args.get('page', 1, type=int)
per_page = flask_request.args.get('per_page', 10, type=int)
return query.paginate(page, per_page, False)
def _fetch_mmd(url, allow_local_url = False):
# Import it here, because SCM uses utils methods
# and fails to import them because of dep-chain.
import module_build_service.scm
yaml = ""
td = None
scm = None
try:
log.debug('Verifying modulemd')
td = tempfile.mkdtemp()
scm = module_build_service.scm.SCM(url, conf.scmurls, allow_local_url)
cod = scm.checkout(td)
cofn = os.path.join(cod, (scm.name + ".yaml"))
with open(cofn, "r") as mmdfile:
yaml = mmdfile.read()
finally:
try:
if td is not None:
shutil.rmtree(td)
except Exception as e:
log.warning(
"Failed to remove temporary directory {!r}: {}".format(
td, str(e)))
mmd = modulemd.ModuleMetadata()
try:
mmd.loads(yaml)
except Exception as e:
log.error('Invalid modulemd: %s' % str(e))
raise UnprocessableEntity('Invalid modulemd: %s' % str(e))
return mmd, scm, yaml
def record_component_builds(mmd, module, initial_batch = 1):
# Import it here, because SCM uses utils methods
# and fails to import them because of dep-chain.
import module_build_service.scm
# List of (pkg_name, git_url) tuples to be used to check
# the availability of git URLs paralelly later.
full_urls = []
# If the modulemd yaml specifies components, then submit them for build
if mmd.components:
for pkgname, pkg in mmd.components.rpms.items():
try:
if pkg.repository and not conf.rpms_allow_repository:
raise Unauthorized(
"Custom component repositories aren't allowed")
if pkg.cache and not conf.rpms_allow_cache:
raise Unauthorized("Custom component caches aren't allowed")
if not pkg.repository:
pkg.repository = conf.rpms_default_repository + pkgname
if not pkg.cache:
pkg.cache = conf.rpms_default_cache + pkgname
if not pkg.ref:
pkg.ref = 'master'
try:
# If the modulemd specifies that the 'f25' branch is what
# we want to pull from, we need to resolve that f25 branch
# to the specific commit available at the time of
# submission (now).
pkg.ref = module_build_service.scm.SCM(
pkg.repository).get_latest(branch=pkg.ref)
except Exception as e:
raise UnprocessableEntity(
"Failed to get the latest commit for %s#%s" % (
pkgname, pkg.ref))
except Exception:
module.transition(conf, models.BUILD_STATES["failed"])
db.session.add(module)
db.session.commit()
raise
full_url = "%s?#%s" % (pkg.repository, pkg.ref)
full_urls.append((pkgname, full_url))
log.debug("Checking scm urls")
# Checks the availability of SCM urls.
pool = ThreadPool(10)
err_msgs = pool.map(lambda data: "Cannot checkout {}".format(data[0])
if not module_build_service.scm.SCM(data[1]).is_available()
else None, full_urls)
# TODO: only the first error message is raised, perhaps concatenate
# the messages together?
for err_msg in err_msgs:
if err_msg:
raise UnprocessableEntity(err_msg)
components = mmd.components.all
components.sort(key=lambda x: x.buildorder)
previous_buildorder = None
# We do not start with batch = 0 here, because the first batch is
# reserved for module-build-macros. First real components must be
# planned for batch 2 and following.
batch = initial_batch
for pkg in components:
# If the pkg is another module, we fetch its modulemd file
# and record its components recursively with the initial_batch
# set to our current batch, so the components of this module
# are built in the right global order.
if isinstance(pkg, modulemd.ModuleComponentModule):
full_url = pkg.repository + "?#" + pkg.ref
mmd = _fetch_mmd(full_url)[0]
batch = record_component_builds(mmd, module, batch)
continue
if previous_buildorder != pkg.buildorder:
previous_buildorder = pkg.buildorder
batch += 1
full_url = pkg.repository + "?#" + pkg.ref
existing_build = models.ComponentBuild.query.filter_by(
module_id=module.id, package=pkg.name).first()
if (existing_build and existing_build.state != models.BUILD_STATES['done']):
existing_build.state = models.BUILD_STATES['init']
db.session.add(existing_build)
else:
# XXX: what about components that were present in previous
# builds but are gone now (component reduction)?
build = models.ComponentBuild(
module_id=module.id,
package=pkg.name,
format="rpms",
scmurl=full_url,
batch=batch
)
db.session.add(build)
return batch
def submit_module_build(username, url, allow_local_url = False):
# Import it here, because SCM uses utils methods
# and fails to import them because of dep-chain.
import module_build_service.scm
mmd, scm, yaml = _fetch_mmd(url, allow_local_url)
# If undefined, set the name field to VCS repo name.
if not mmd.name and scm:
mmd.name = scm.name
# If undefined, set the stream field to the VCS branch name.
if not mmd.stream and scm:
mmd.stream = scm.branch
# If undefined, set the version field to int represenation of VCS commit.
if not mmd.version and scm:
mmd.version = int(scm.version)
module = models.ModuleBuild.query.filter_by(name=mmd.name,
stream=mmd.stream,
version=mmd.version).first()
if module:
log.debug('Checking whether module build already exist.')
# TODO: make this configurable, we might want to allow
# resubmitting any stuck build on DEV no matter the state
if module.state not in (models.BUILD_STATES['failed'],):
log.error('Module (state=%s) already exists. '
'Only new or failed builds are allowed.'
% module.state)
raise Conflict('Module (state=%s) already exists. '
'Only new or failed builds are allowed.'
% module.state)
log.debug('Resuming existing module build %r' % module)
module.username = username
module.transition(conf, models.BUILD_STATES["init"])
log.info("Resumed existing module build in previous state %s"
% module.state)
else:
log.debug('Creating new module build')
module = models.ModuleBuild.create(
db.session,
conf,
name=mmd.name,
stream=mmd.stream,
version=mmd.version,
modulemd=yaml,
scmurl=url,
username=username
)
record_component_builds(mmd, module)
module.modulemd = mmd.dumps()
module.transition(conf, models.BUILD_STATES["wait"])
db.session.add(module)
db.session.commit()
log.info("%s submitted build of %s, stream=%s, version=%s", username,
mmd.name, mmd.stream, mmd.version)
return module
def insert_fake_baseruntime():
""" Insert a fake base-runtime module into our db.
This is done so that we can reference the build profiles in its modulemd.
See:
- https://pagure.io/fm-orchestrator/pull-request/228
- https://pagure.io/fm-orchestrator/pull-request/225
"""
import sqlalchemy as sa
import modulemd
yaml = """
document: modulemd
version: 1
data:
name: base-runtime
stream: master
version: 3
summary: A fake base-runtime module, used to bootstrap the infrastructure.
description: ...
profiles:
buildroot:
rpms:
- bash
- bzip2
- coreutils
- cpio
- diffutils
- fedora-release
- findutils
- gawk
- gcc
- gcc-c++
- grep
- gzip
- info
- make
- patch
- redhat-rpm-config
- rpm-build
- sed
- shadow-utils
- tar
- unzip
- util-linux
- which
- xz
srpm-buildroot:
rpms:
- bash
- fedora-release
- fedpkg-minimal
- gnupg2
- redhat-rpm-config
- rpm-build
- shadow-utils
"""
mmd = modulemd.ModuleMetadata()
mmd.loads(yaml)
# Check to see if this thing already exists...
query = models.ModuleBuild.query\
.filter_by(name=mmd.name)\
.filter_by(stream=mmd.stream)\
.filter_by(version=mmd.version)
if query.count():
logging.info('%r exists. Skipping creation.' % query.first())
return
# Otherwise, it does not exist. So, create it.
now = datetime.utcnow()
module = models.ModuleBuild()
module.name = mmd.name
module.stream = mmd.stream
module.version = mmd.version
module.modulemd = yaml
module.scmurl = '...'
module.owner = 'modularity'
module.state = models.BUILD_STATES['done']
module.state_reason = 'Artificially created.'
module.time_submitted = now
module.time_modified = now
module.time_completed = now
db.session.add(module)
db.session.commit()
def scm_url_schemes(terse=False):
"""
Definition of URL schemes supported by both frontend and scheduler.
NOTE: only git URLs in the following formats are supported atm:
git://
git+http://
git+https://
git+rsync://
http://
https://
file://
:param terse=False: Whether to return terse list of unique URL schemes
even without the "://".
"""
scm_types = {
"git": ("git://", "git+http://", "git+https://",
"git+rsync://", "http://", "https://", "file://")
}
if not terse:
return scm_types
else:
scheme_list = []
for scm_type, scm_schemes in scm_types.items():
scheme_list.extend([scheme[:-3] for scheme in scm_schemes])
return list(set(scheme_list))