Get the weight of Koji tasks and use it to sort the components in a batch instead of builds time.

This commit is contained in:
Jan Kaluza
2017-11-08 17:22:01 +01:00
committed by mprahl
parent d0adebf8d3
commit 7de120c17b
9 changed files with 435 additions and 12 deletions

View File

@@ -40,16 +40,105 @@ import threading
import munch
from OpenSSL.SSL import SysCallError
from module_build_service import log
from module_build_service import log, conf
import module_build_service.scm
import module_build_service.utils
from module_build_service.builder.utils import execute_cmd
from module_build_service.errors import ProgrammingError
from base import GenericBuilder
logging.basicConfig(level=logging.DEBUG)
def koji_multicall_map(koji_session, koji_session_fnc, list_of_args=None, list_of_kwargs=None):
"""
Calls the `koji_session_fnc` using Koji multicall feature N times based on the list of
arguments passed in `list_of_args` and `list_of_kwargs`.
Returns list of responses sorted the same way as input args/kwargs. In case of error,
the error message is logged and None is returned.
For example to get the package ids of "httpd" and "apr" packages:
ids = koji_multicall_map(session, session.getPackageID, ["httpd", "apr"])
# ids is now [280, 632]
:param KojiSessions koji_session: KojiSession to use for multicall.
:param object koji_session_fnc: Python object representing the KojiSession method to call.
:param list list_of_args: List of args which are passed to each call of koji_session_fnc.
:param list list_of_kwargs: List of kwargs which are passed to each call of koji_session_fnc.
"""
if list_of_args is None and list_of_kwargs is None:
raise ProgrammingError("One of list_of_args or list_of_kwargs must be set.")
if (type(list_of_args) not in [type(None), list] or
type(list_of_kwargs) not in [type(None), list]):
raise ProgrammingError("list_of_args and list_of_kwargs must be list or None.")
if list_of_kwargs is None:
list_of_kwargs = [{}] * len(list_of_args)
if list_of_args is None:
list_of_args = [[]] * len(list_of_kwargs)
if len(list_of_args) != len(list_of_kwargs):
raise ProgrammingError("Length of list_of_args and list_of_kwargs must be the same.")
koji_session.multicall = True
for args, kwargs in zip(list_of_args, list_of_kwargs):
if type(args) != list:
args = [args]
if type(kwargs) != dict:
raise ProgrammingError("Every item in list_of_kwargs must be a dict")
koji_session_fnc(*args, **kwargs)
try:
responses = koji_session.multiCall(strict=True)
except Exception:
log.exception("Exception raised for multicall of method %r with args %r, %r:",
koji_session_fnc, args, kwargs)
return None
if not responses:
log.error("Koji did not return response for multicall of %r", koji_session_fnc)
return None
if type(responses) != list:
log.error("Fault element was returned for multicall of method %r: %r",
koji_session_fnc, responses)
return None
results = []
# For the response specification, see
# https://web.archive.org/web/20060624230303/http://www.xmlrpc.com/discuss/msgReader$1208?mode=topic
# Relevant part of this:
# Multicall returns an array of responses. There will be one response for each call in
# the original array. The result will either be a one-item array containing the result value,
# or a struct of the form found inside the standard <fault> element.
for response, args, kwargs in zip(responses, list_of_args, list_of_kwargs):
if type(response) == list:
if not response:
log.error("Empty list returned for multicall of method %r with args %r, %r",
koji_session_fnc, args, kwargs)
return None
results.append(response[0])
else:
log.error("Unexpected data returned for multicall of method %r with args %r, %r: %r",
koji_session_fnc, args, kwargs, response)
return None
return results
@module_build_service.utils.retry(wait_on=(xmlrpclib.ProtocolError, koji.GenericError))
def koji_retrying_multicall_map(*args, **kwargs):
"""
Retrying version of koji_multicall_map. This tries to retry the Koji call
in case of koji.GenericError or xmlrpclib.ProtocolError.
Please refer to koji_multicall_map for further specification of arguments.
"""
return koji_multicall_map(*args, **kwargs)
class KojiModuleBuilder(GenericBuilder):
""" Koji specific builder class """
@@ -809,7 +898,8 @@ chmod 644 %buildroot/%_sysconfdir/rpm/macros.zz-modules
return tasks
def get_average_build_time(self, component):
@classmethod
def get_average_build_time(cls, component):
"""
Get the average build time of the component from Koji
:param component: a ComponentBuild object
@@ -817,4 +907,99 @@ chmod 644 %buildroot/%_sysconfdir/rpm/macros.zz-modules
"""
# If the component has not been built before, then None is returned. Instead, let's
# return 0.0 so the type is consistent
return self.koji_session.getAverageBuildDuration(component.package) or 0.0
koji_session = KojiModuleBuilder.get_session(conf, None)
return koji_session.getAverageBuildDuration(component) or 0.0
@classmethod
def get_build_weights(cls, components):
"""
Returns a dict with component name as a key and float number
representing the overall Koji weight of a component build.
The weight is sum of weights of all tasks in a previously done modular
build of a component.
:param list components: List of component names.
:rtype: dict
:return: {component_name: weight_as_float, ...}
"""
koji_session = KojiModuleBuilder.get_session(conf, None)
# Get our own userID, so we can limit the builds to only modular builds
user_info = koji_session.getLoggedInUser()
if not user_info or "id" not in user_info:
log.warn("Koji.getLoggedInUser() failed while getting build weight.")
return cls.compute_weights_from_build_time(components)
mbs_user_id = user_info["id"]
# Get the Koji PackageID for every component in single Koji call.
# If some package does not exist in Koji, component_ids will be None.
component_ids = koji_retrying_multicall_map(
koji_session, koji_session.getPackageID, list_of_args=components)
if not component_ids:
return cls.compute_weights_from_build_time(components)
# Prepare list of queries to call koji_session.listBuilds
build_queries = []
for component_id in component_ids:
build_queries.append({
"packageID": component_id,
"userID": mbs_user_id,
"state": koji.BUILD_STATES["COMPLETE"],
"queryOpts": {"order": "-build_id", "limit": 1}})
# Get the latest Koji build created by MBS for every component in single Koji call.
builds_per_component = koji_retrying_multicall_map(
koji_session, koji_session.listBuilds, list_of_kwargs=build_queries)
if not builds_per_component:
return cls.compute_weights_from_build_time(components)
# Get list of task_ids associated with the latest build in builds.
# For some packages, there may not be a build done by MBS yet.
# We store such packages in `components_without_build` and later
# compute the weight by compute_weights_from_build_time().
# For others, we will continue by examining weights of all tasks
# belonging to that build later.
task_ids = []
components_with_build = []
components_without_build = []
for builds, component_name in zip(builds_per_component, components):
if not builds:
# No build for this component.
components_without_build.append(component_name)
continue
latest_build = builds[0]
task_id = latest_build["task_id"]
if not task_id:
# No task_id for this component, this can happen for imported
# component builds.
components_without_build.append(component_name)
continue
components_with_build.append(component_name)
task_ids.append(task_id)
weights = {}
# For components without any build, fallback to weights computation based on
# the average time to build.
weights.update(cls.compute_weights_from_build_time(components_without_build))
# For components with a build, get the list of tasks associated with this build
# and compute the weight for each component build as sum of weights of all tasks.
tasks_per_latest_build = koji_retrying_multicall_map(
koji_session, koji_session.getTaskDescendents, list_of_args=task_ids)
if not tasks_per_latest_build:
return cls.compute_weights_from_build_time(components_with_build)
for tasks, component_name in zip(tasks_per_latest_build, components_with_build):
# Compute overall weight of this build. This is sum of weights
# of all tasks in a build.
weight = 0
for task in tasks.values():
weight += sum([t["weight"] for t in task])
weights[component_name] = weight
return weights

View File

@@ -356,3 +356,72 @@ class GenericBuilder(six.with_metaclass(ABCMeta)):
:return: a float of 0.0
"""
return 0.0
@classmethod
def get_build_weights(cls, components):
"""
Returns a dict with component name as a key and float number
representing the overall Koji weight of a component build.
:param list components: List of component names.
:rtype: dict
:return: {component_name: weight_as_float, ...}
"""
return cls.compute_weights_from_build_time(components)
@classmethod
def compute_weights_from_build_time(cls, components, arches=None):
"""
Computes the weights of ComponentBuilds based on average time to build
and list of arches for which the component is going to be built.
This method should be used as a fallback only when KojiModuleBuilder
cannot be used, because the weight this method produces is not 100% accurate.
:param components: List of comopnent names to compute the weight for.
:param arches: List of arches to build for or None. If the value is None,
conf.koji_arches will be used instead.
:rtype: dict
:return: {component_name: weight_as_float, ...}
"""
if not arches:
arches = conf.koji_arches
weights = {}
for component in components:
average_time_to_build = cls.get_average_build_time(component)
# The way how `weight` is computed is based on hardcoded weight values
# in kojid.py.
# The weight computed here is not 100% accurate, because there are
# multiple smaller tasks in koji like waitrepo or createrepo and we
# cannot say if they will be executed as part of this component build.
# The weight computed here is used only to limit the number of builds
# and we generally do not care about waitrepo/createrepo weights in MBS.
# 1.5 is what Koji hardcodes as a default weight for BuildArchTask.
weight = 1.5
if not average_time_to_build:
weights[component] = weight
continue
if average_time_to_build < 0:
log.warn("Negative average build duration for component %s: %s",
component, str(average_time_to_build))
weights[component] = weight
continue
# Increase the task weight by 0.75 for every hour of build duration.
adj = (average_time_to_build / ((60 * 60) / 0.75))
# cap the adjustment at +4.5
weight += min(4.5, adj)
# We build for all arches, so multiply the weight by number of arches.
weight = weight * len(arches)
# 1.5 here is hardcoded Koji weight of single BuildSRPMFromSCMTask
weight += 1.5
weights[component] = weight
return weights

View File

@@ -0,0 +1,22 @@
"""Add component_builds.weight
Revision ID: f5b1c5203cce
Revises: 524c3b1c683c
Create Date: 2017-11-08 17:18:27.401546
"""
# revision identifiers, used by Alembic.
revision = 'f5b1c5203cce'
down_revision = '524c3b1c683c'
from alembic import op
import sqlalchemy as sa
def upgrade():
op.add_column('component_builds', sa.Column('weight', sa.Float(), nullable=True))
def downgrade():
op.drop_column('component_builds', 'weight')

View File

@@ -527,6 +527,10 @@ class ComponentBuild(MBSBase):
reused_component_id = db.Column(
db.Integer, db.ForeignKey('component_builds.id'))
# Weight defines the complexity of the component build as calculated by the builder's
# get_build_weights function
weight = db.Column(db.Float, default=0)
@classmethod
def from_component_event(cls, session, event):
if isinstance(event, module_build_service.messaging.KojiBuildChange):

View File

@@ -165,9 +165,7 @@ def continue_batch_build(config, module, session, builder, components=None):
components_to_build = []
# Sort the unbuilt_components so that the components that take the longest to build are
# first
log.info('Sorting the unbuilt components by their average build time')
unbuilt_components.sort(key=lambda c: builder.get_average_build_time(c), reverse=True)
log.info('Done sorting the unbuilt components by their average build time')
unbuilt_components.sort(key=lambda c: c.weight, reverse=True)
# Check for builds that exist in the build system but MBS doesn't know about
for component in unbuilt_components:
@@ -860,6 +858,9 @@ def merge_included_mmd(mmd, included_mmd):
def record_component_builds(mmd, module, initial_batch=1,
previous_buildorder=None, main_mmd=None, session=None):
# Imported here to allow import of utils in GenericBuilder.
import module_build_service.builder
if not session:
session = db.session
@@ -890,6 +891,9 @@ def record_component_builds(mmd, module, initial_batch=1,
components = mmd.components.all
components.sort(key=lambda x: x.buildorder)
weights = module_build_service.builder.GenericBuilder.get_build_weights(
[c.name for c in components])
# We do not start with batch = 0 here, because the first batch is
# reserved for module-build-macros. First real components must be
# planned for batch 2 and following.
@@ -922,7 +926,8 @@ def record_component_builds(mmd, module, initial_batch=1,
format="rpms",
scmurl=full_url,
batch=batch,
ref=pkgref
ref=pkgref,
weight=weights[pkg.name]
)
session.add(build)

View File

@@ -68,3 +68,7 @@ class TestGenericBuilder(unittest.TestCase):
ret = GenericBuilder.default_buildroot_groups(db.session, self.module)
self.assertEqual(ret, expected_groups)
resolve_profiles.assert_called_once()
def test_get_build_weights(self):
weights = GenericBuilder.get_build_weights(["httpd", "apr"])
self.assertEqual(weights, {"httpd": 1.5, "apr": 1.5})

View File

@@ -243,6 +243,101 @@ class TestKojiBuilder(unittest.TestCase):
builder.koji_session.tagBuild.assert_called_once_with(
builder.module_tag["id"], "new-1.0-1.module_e0095747")
@patch('module_build_service.builder.KojiModuleBuilder.KojiModuleBuilder.get_session')
def test_get_build_weights(self, get_session):
session = MagicMock()
session.getLoggedInUser.return_value = {"id": 123}
session.multiCall.side_effect = [
# getPackageID response
[[1], [2]],
# listBuilds response
[[[{"task_id": 456}]], [[{"task_id": 789}]]],
# getTaskDescendents response
[[{'1': [], '2': [], '3': [{'weight': 1.0}, {'weight': 1.0}]}],
[{'1': [], '2': [], '3': [{'weight': 1.0}, {'weight': 1.0}]}]]
]
get_session.return_value = session
weights = KojiModuleBuilder.get_build_weights(["httpd", "apr"])
self.assertEqual(weights, {"httpd": 2, "apr": 2})
expected_calls = [mock.call(456), mock.call(789)]
self.assertEqual(session.getTaskDescendents.mock_calls, expected_calls)
@patch('module_build_service.builder.KojiModuleBuilder.KojiModuleBuilder.get_session')
def test_get_build_weights_no_task_id(self, get_session):
session = MagicMock()
session.getLoggedInUser.return_value = {"id": 123}
session.multiCall.side_effect = [
# getPackageID response
[[1], [2]],
# listBuilds response
[[[{"task_id": 456}]], [[{"task_id": None}]]],
# getTaskDescendents response
[[{'1': [], '2': [], '3': [{'weight': 1.0}, {'weight': 1.0}]}]]
]
get_session.return_value = session
weights = KojiModuleBuilder.get_build_weights(["httpd", "apr"])
self.assertEqual(weights, {"httpd": 2, "apr": 1.5})
expected_calls = [mock.call(456)]
self.assertEqual(session.getTaskDescendents.mock_calls, expected_calls)
@patch('module_build_service.builder.KojiModuleBuilder.KojiModuleBuilder.get_session')
def test_get_build_weights_no_build(self, get_session):
session = MagicMock()
session.getLoggedInUser.return_value = {"id": 123}
session.multiCall.side_effect = [
# getPackageID response
[[1], [2]],
# listBuilds response
[[[{"task_id": 456}]], [[]]],
# getTaskDescendents response
[[{'1': [], '2': [], '3': [{'weight': 1.0}, {'weight': 1.0}]}]]
]
get_session.return_value = session
weights = KojiModuleBuilder.get_build_weights(["httpd", "apr"])
self.assertEqual(weights, {"httpd": 2, "apr": 1.5})
expected_calls = [mock.call(456)]
self.assertEqual(session.getTaskDescendents.mock_calls, expected_calls)
@patch('module_build_service.builder.KojiModuleBuilder.KojiModuleBuilder.get_session')
def test_get_build_weights_listBuilds_failed(self, get_session):
session = MagicMock()
session.getLoggedInUser.return_value = {"id": 123}
session.multiCall.side_effect = [[[1], [2]], []]
get_session.return_value = session
weights = KojiModuleBuilder.get_build_weights(["httpd", "apr"])
self.assertEqual(weights, {"httpd": 1.5, "apr": 1.5})
expected_calls = [mock.call(packageID=1, userID=123, state=1,
queryOpts={'limit': 1, 'order': '-build_id'}),
mock.call(packageID=2, userID=123, state=1,
queryOpts={'limit': 1, 'order': '-build_id'})]
self.assertEqual(session.listBuilds.mock_calls, expected_calls)
@patch('module_build_service.builder.KojiModuleBuilder.KojiModuleBuilder.get_session')
def test_get_build_weights_getPackageID_failed(self, get_session):
session = MagicMock()
session.getLoggedInUser.return_value = {"id": 123}
session.multiCall.side_effect = [[], []]
get_session.return_value = session
weights = KojiModuleBuilder.get_build_weights(["httpd", "apr"])
self.assertEqual(weights, {"httpd": 1.5, "apr": 1.5})
expected_calls = [mock.call("httpd"), mock.call("apr")]
self.assertEqual(session.getPackageID.mock_calls, expected_calls)
@patch('module_build_service.builder.KojiModuleBuilder.KojiModuleBuilder.get_session')
def test_get_build_weights_getLoggedInUser_failed(self, get_session):
weights = KojiModuleBuilder.get_build_weights(["httpd", "apr"])
self.assertEqual(weights, {"httpd": 1.5, "apr": 1.5})
class TestGetKojiClientSession(unittest.TestCase):

View File

@@ -32,7 +32,8 @@ import module_build_service.scm
from module_build_service import models, conf
from module_build_service.errors import ProgrammingError, ValidationError, UnprocessableEntity
from tests import (test_reuse_component_init_data, init_data, db,
test_reuse_shared_userspace_init_data)
test_reuse_shared_userspace_init_data,
clean_database)
import mock
import koji
import module_build_service.scheduler.handlers.components
@@ -704,6 +705,43 @@ class TestUtils(unittest.TestCase):
assert username_arg == username
rmtree(module_dir)
@vcr.use_cassette(
path.join(CASSETTES_DIR, ('tests.test_utils.TestUtils.'
'test_record_component_builds_set_weight')))
@patch('module_build_service.scm.SCM')
def test_record_component_builds_set_weight(self, mocked_scm):
with app.app_context():
clean_database()
mocked_scm.return_value.commit = \
'620ec77321b2ea7b0d67d82992dda3e1d67055b4'
# For all the RPMs in testmodule, get_latest is called
hashes_returned = {
'f25': '4ceea43add2366d8b8c5a622a2fb563b625b9abf',
'f24': 'fbed359411a1baa08d4a88e0d12d426fbf8f602c'}
def mocked_get_latest(branch="master"):
return hashes_returned[branch]
mocked_scm.return_value.get_latest = mocked_get_latest
testmodule_variant_mmd_path = path.join(
BASE_DIR, '..', 'staged_data', 'testmodule-variant.yaml')
testmodule_variant_mmd = modulemd.ModuleMetadata()
with open(testmodule_variant_mmd_path) as mmd_file:
testmodule_variant_mmd.loads(mmd_file)
mmd = testmodule_variant_mmd
module_build = models.ModuleBuild.create(
db.session, conf, "test", "stream", "1", mmd.dumps(), "scmurl", "owner")
module_build_service.utils.record_component_builds(
mmd, module_build)
self.assertEqual(module_build.state, models.BUILD_STATES['init'])
db.session.refresh(module_build)
for c in module_build.component_builds:
self.assertEqual(c.weight, 1.5)
class DummyModuleBuilder(GenericBuilder):
"""
@@ -971,12 +1009,13 @@ class TestBatches(unittest.TestCase):
module_id=2, package='perl-List-Compare').one()
plc_component.ref = '5ceea46add2366d8b8c5a623a2fb563b625b9abd'
# Components are by default built by component id. To find out that weight is respected,
# we have to set bigger weight to component with lower id.
pt_component.weight = 3 if pt_component.id < plc_component.id else 4
plc_component.weight = 4 if pt_component.id < plc_component.id else 3
builder = mock.MagicMock()
builder.recover_orphaned_artifact.return_value = []
# The call order of get_average_build_time should be by the component's ID. Having this
# side_effect tells continue_batch_build to build the second component in the build batch
# first and the first component in the build batch second.
builder.get_average_build_time.side_effect = [1234.56, 2345.67]
further_work = module_build_service.utils.start_next_batch_build(
conf, module_build, db.session, builder)

Binary file not shown.