From bfd9a13205dd1bdcadc2cceb922f44e5928fb817 Mon Sep 17 00:00:00 2001 From: Jan Kaluza Date: Mon, 26 Jul 2021 08:26:47 +0200 Subject: [PATCH] Allow overriding RPM components refs while submitting the module build. There is a need to rebuild the module builds done in CentOS 9 Stream internally in MBS to include them in RHEL. This is currenly a hard task, because the RPM components included in a module are usually taken from HEAD of the branch defined by their `ref` value. For the rebuild task, it means we would have to ensure that the HEAD of all RPM components points to right commit hash right before we start rebuilding CentOS 9 Stream module in internal MBS. This is very hard and fragile thing to do, especially if there are two different modules using the RPM component from the same branch. This is prone to race condition and makes the rebuilds quite complex and in some cases not possible to do without force pushes to RPM component repositories which is not acceptable by internal dist-git policy. This commit fixes it by allowing overriding the commit hash while submitting the module build. This helps in the mentioned situation, because we can keep internal RPM components branches in 1:1 sync with CentOS 9 Stream branches and HEAD can always point to the same commit in both internal and CentOS 9 Stream repositories. When the module rebuild is submitted in internal MBS, we can use this new feature to override the `ref` for each RPM component so it points to particular commit and the requirement for HEAD to point to this commit is no longer there. The `ref` is overriden only internally in MBS (but it is recorded in logs and in XMD section), so the input modulemd file is not altered. This is the same logic as used for other overrides (`buildrequire_overrides` or `side_tag`). This does not bring any security problem, because it is already possible to use commit hash in `ref`, so the package maintainer can already change the commit hash to any particular commit by using this `ref` value. Signed-off-by: Jan Kaluza --- README.rst | 2 + module_build_service/scheduler/submit.py | 48 ++++++++++++++++++++---- module_build_service/web/submit.py | 18 +++++++++ module_build_service/web/views.py | 21 +++++++++++ tests/test_scheduler/test_submit.py | 21 ++++++++++- tests/test_web/test_views.py | 33 ++++++++++++++++ 6 files changed, 133 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index 094179a2..75249dcc 100644 --- a/README.rst +++ b/README.rst @@ -151,6 +151,8 @@ Options: ``multipart/form-data`` request. Only allowed if ``scratch`` is ``True`` or if the MBS setting ``YAML_SUBMIT_ALLOWED`` is ``True``. The basename of the file will be used as the module name. +- ``rpm_component_ref_overrides`` - the commit refs to override the RPM component refs with. + The expected format is ``{'rpm_component_name': "new_commit_ref"}``. Module build state query diff --git a/module_build_service/scheduler/submit.py b/module_build_service/scheduler/submit.py index 4aa4c4d8..5334e125 100644 --- a/module_build_service/scheduler/submit.py +++ b/module_build_service/scheduler/submit.py @@ -5,6 +5,7 @@ from datetime import datetime import json from multiprocessing.dummy import Pool as ThreadPool import os +from collections import namedtuple import kobo.rpmlib @@ -17,6 +18,14 @@ from module_build_service.common.utils import to_text_type from module_build_service.scheduler.db_session import db_session +# The namedtuple to store data passed to _scm_get_latest with following keys: +# - rpm_component - The Modulemd.ComponentRPM instance representing +# the component. +# - ref_override - The str instance used to override the component's ref +# or None. +SCMGetLatestData = namedtuple("SCMGetLatestData", "rpm_component ref_override") + + def get_build_arches(mmd, config): """ Returns the list of architectures for which the module `mmd` should be built. @@ -205,24 +214,35 @@ def record_filtered_rpms(mmd): return mmd -def _scm_get_latest(pkg): +def _scm_get_latest(data): + """ + Resolves the git ref for the package defined in the `data`. Applies git + ref override if set in the `data`. Returns a dict with resolved git ref + or possible resolving error. + + :param SCMGetLatestData data: Information about the packages to resolve. + :return: Dict with following keys: + - pkg_name - Name of the resolved package. + - pkg_ref - Resolve git ref. + - error - Contains the error if any or it is set to `None`. + """ try: # If the modulemd specifies that the 'f25' branch is what # we want to pull from, we need to resolve that f25 branch # to the specific commit available at the time of # submission (now). - repo = pkg.get_repository() - ref = pkg.get_ref() + repo = data.rpm_component.get_repository() + ref = data.ref_override or data.rpm_component.get_ref() log.debug("Getting the commit hash for the ref %s on the repo %s", ref, repo) pkgref = module_build_service.common.scm.SCM(repo).get_latest(ref) except Exception as e: log.exception(e) return { "error": "Failed to get the latest commit for %s#%s" - % (pkg.get_repository(), pkg.get_ref()) + % (data.rpm_component.get_repository(), data.rpm_component.get_ref()) } - return {"pkg_name": pkg.get_name(), "pkg_ref": pkgref, "error": None} + return {"pkg_name": data.rpm_component.get_name(), "pkg_ref": pkgref, "error": None} def format_mmd(mmd, scmurl, module=None, db_session=None, srpm_overrides=None): @@ -305,6 +325,10 @@ def format_mmd(mmd, scmurl, module=None, db_session=None, srpm_overrides=None): if not mod.get_ref(): mod.set_ref("master") + # It is possible to override the ref of RPM component using + # the rpm_component_ref_overrides. + ref_overrides = xmd["mbs"].get("rpm_component_ref_overrides", {}) + # Check that SCM URL is valid and replace potential branches in pkg refs # by real SCM hash and store the result to our private xmd place in modulemd. pool = ThreadPool(20) @@ -312,7 +336,7 @@ def format_mmd(mmd, scmurl, module=None, db_session=None, srpm_overrides=None): # Filter out the packages which we have already resolved in possible # previous runs of this method (can be caused by module build resubmition) # or which have custom SRPMs and shouldn't be resolved. - pkgs_to_resolve = [] + scm_get_latest_data_list = [] for name in mmd.get_rpm_component_names(): if name not in xmd["mbs"]["rpms"]: if name in srpm_overrides: @@ -320,9 +344,17 @@ def format_mmd(mmd, scmurl, module=None, db_session=None, srpm_overrides=None): # ref entry so no further verification takes place. xmd["mbs"]["rpms"][name] = {"ref": None} else: - pkgs_to_resolve.append(mmd.get_rpm_component(name)) + # Apply possible ref override. + if name in ref_overrides: + ref_override = ref_overrides[name] + log.info("Applying rpm_component_ref_overrides - " + "%s, new ref is %s." % (name, ref_override)) + else: + ref_override = None + scm_get_latest_data_list.append( + SCMGetLatestData(mmd.get_rpm_component(name), ref_override)) - async_result = pool.map_async(_scm_get_latest, pkgs_to_resolve) + async_result = pool.map_async(_scm_get_latest, scm_get_latest_data_list) # For modules with lot of components, the _scm_get_latest can take a lot of time. # We need to bump time_modified from time to time, otherwise poller could think diff --git a/module_build_service/web/submit.py b/module_build_service/web/submit.py index 57fcb3d8..c6b816fc 100644 --- a/module_build_service/web/submit.py +++ b/module_build_service/web/submit.py @@ -270,6 +270,23 @@ def _apply_dep_overrides(mmd, params): ) +def _apply_rpm_component_ref_overrides(mmd, params): + """ + If `rpm_component_ref_overrides` is given, note it in the xmd. + + :param Modulemd.ModuleStream mmd: the modulemd to apply the overrides on + :param dict params: the API parameters passed in by the user + """ + ref_overrides = params.get("rpm_component_ref_overrides", {}) + if not ref_overrides: + # No changes needed. + return + + xmd = mmd.get_xmd() + xmd.setdefault("mbs", {})["rpm_component_ref_overrides"] = ref_overrides + mmd.set_xmd(xmd) + + def _apply_side_tag(mmd, params): """ If a side tag identifier is given, note it in the xmd @@ -601,6 +618,7 @@ def submit_module_build(db_session, username, stream_or_packager, params, module validate_mmd(mmd) _apply_dep_overrides(mmd, params) _apply_side_tag(mmd, params) + _apply_rpm_component_ref_overrides(mmd, params) _modify_buildtime_streams(db_session, mmd, resolve_base_module_virtual_streams) _process_support_streams(db_session, mmd, params) mmds += generate_expanded_mmds(db_session, mmd, raise_if_stream_ambigous, diff --git a/module_build_service/web/views.py b/module_build_service/web/views.py index 0af2d2c5..a9c37077 100644 --- a/module_build_service/web/views.py +++ b/module_build_service/web/views.py @@ -369,6 +369,7 @@ class BaseHandler(object): valid_params = { "branch", "buildrequire_overrides", + "rpm_component_ref_overrides", "modulemd", "module_name", "module_stream", @@ -425,6 +426,25 @@ class BaseHandler(object): if not isinstance(stream, string_types): raise ValidationError(invalid_override_msg) + def _validate_ref_overrides_format(self, key): + """ + Validate any dependency overrides provided to the API. + + :param str key: the override key to validate + :raises ValidationError: when the overrides are an invalid format + """ + if not self.data.get(key): + return + invalid_override_msg = ( + 'The "{}" parameter must be an object with the keys as component ' + "names and the values as strings of refs".format(key) + ) + if not isinstance(self.data[key], dict): + raise ValidationError(invalid_override_msg) + for ref in self.data[key].values(): + if not isinstance(ref, string_types): + raise ValidationError(invalid_override_msg) + def validate_optional_params(self): forbidden_params = [k for k in self.data if k not in self.valid_params] if forbidden_params: @@ -452,6 +472,7 @@ class BaseHandler(object): self._validate_dep_overrides_format("buildrequire_overrides") self._validate_dep_overrides_format("require_overrides") + self._validate_ref_overrides_format("rpm_component_ref_overrides") if "reuse_components_from" in self.data: if "rebuild_strategy" in self.data and self.data["rebuild_strategy"] == "all": diff --git a/tests/test_scheduler/test_submit.py b/tests/test_scheduler/test_submit.py index 760c6131..74628fd5 100644 --- a/tests/test_scheduler/test_submit.py +++ b/tests/test_scheduler/test_submit.py @@ -123,14 +123,22 @@ class TestSubmit: None, ], ) + @pytest.mark.parametrize( + "rpm_component_ref_overrides", + [ + {"tangerine": "ref_override"}, + None, + ], + ) @mock.patch("module_build_service.common.scm.SCM") - def test_format_mmd(self, mocked_scm, srpm_overrides, scmurl): + def test_format_mmd(self, mocked_scm, srpm_overrides, rpm_component_ref_overrides, scmurl): mocked_scm.return_value.commit = "620ec77321b2ea7b0d67d82992dda3e1d67055b4" # For all the RPMs in testmodule, get_latest is called hashes_returned = { "master": "fbed359411a1baa08d4a88e0d12d426fbf8f602c", "f28": "4ceea43add2366d8b8c5a622a2fb563b625b9abf", "f27": "5deef23acd2367d8b8d5a621a2fc568b695bc3bd", + "ref_override": "ref_override", } def mocked_get_latest(ref="master"): @@ -146,6 +154,13 @@ class TestSubmit: if srpm_overrides: # Set a bogus ref that will raise an exception if not properly ignored. mmd.get_rpm_component("perl-List-Compare").set_ref("bogus") + if rpm_component_ref_overrides: + xmd = mmd.get_xmd() + xmd["mbs"] = {"rpm_component_ref_overrides": rpm_component_ref_overrides} + mmd.set_xmd(xmd) + tangerine_ref = rpm_component_ref_overrides["tangerine"] + else: + tangerine_ref = hashes_returned["f27"] format_mmd(mmd, scmurl, srpm_overrides=srpm_overrides) # Make sure that original refs are not changed. @@ -167,7 +182,7 @@ class TestSubmit: "rpms": { "perl-List-Compare": {"ref": "fbed359411a1baa08d4a88e0d12d426fbf8f602c"}, "perl-Tangerine": {"ref": "4ceea43add2366d8b8c5a622a2fb563b625b9abf"}, - "tangerine": {"ref": "5deef23acd2367d8b8d5a621a2fc568b695bc3bd"}, + "tangerine": {"ref": tangerine_ref}, }, "scmurl": "", } @@ -177,6 +192,8 @@ class TestSubmit: xmd["mbs"]["scmurl"] = scmurl if srpm_overrides: xmd["mbs"]["rpms"]["perl-List-Compare"]["ref"] = match_anything + if rpm_component_ref_overrides: + xmd["mbs"]["rpm_component_ref_overrides"] = match_anything mmd_xmd = mmd.get_xmd() assert mmd_xmd == xmd diff --git a/tests/test_web/test_views.py b/tests/test_web/test_views.py index 3d0ef1a8..68909780 100644 --- a/tests/test_web/test_views.py +++ b/tests/test_web/test_views.py @@ -1583,6 +1583,9 @@ class TestSubmitBuild: {"require_overrides": {"platform": "f28"}}, {"require_overrides": {"platform": 28}}, {"require_overrides": "platform:f28"}, + {"rpm_component_ref_overrides": {"pkg_name": ["hash"]}}, + {"rpm_component_ref_overrides": {"pkg_name": 28}}, + {"rpm_component_ref_overrides": "pkg_name:hash"}, ), ) @patch("module_build_service.web.auth.get_user", return_value=user) @@ -1614,8 +1617,38 @@ class TestSubmitBuild: msg = msg.format("buildrequire_overrides") elif "require_overrides" in optional_params: msg = msg.format("require_overrides") + elif "rpm_component_ref_overrides" in optional_params: + msg = ( + "The \"rpm_component_ref_overrides\" parameter must be an object with the keys " + "as component names and the values as strings of refs" + ) assert data == {"error": "Bad Request", "message": msg, "status": 400} + @patch("module_build_service.web.auth.get_user", return_value=user) + @patch("module_build_service.common.scm.SCM") + def test_submit_rpm_component_ref_overrides(self, mocked_scm, mocked_get_user): + optional_params = {"rpm_component_ref_overrides": {"pkg_name": "ref_override"}} + + FakeSCM( + mocked_scm, "fakemodule", "fakemodule.yaml", + "3da541559918a808c2402bba5012f6c60b27661c") + + post_url = "/module-build-service/2/module-builds/" + scm_url = ( + "https://src.stg.fedoraproject.org/modules/testmodule.git?#" + "68931c90de214d9d13feefbd35246a81b6cb8d49" + ) + json_input = {"branch": "master", "scmurl": scm_url} + json_input.update(optional_params) + + rv = self.client.post(post_url, data=json.dumps(json_input)) + data = json.loads(rv.data) + assert rv.status_code == 201 + data = json.loads(rv.data)[0] + mmd = load_mmd(data["modulemd"]) + xmd_override = mmd.get_xmd()["mbs"]["rpm_component_ref_overrides"] + assert xmd_override == optional_params["rpm_component_ref_overrides"] + @pytest.mark.parametrize("api_version", [1, 2]) @patch("module_build_service.web.auth.get_user", return_value=user) @patch("module_build_service.common.scm.SCM")