From 008f2c9d29f5e0a2c1bf8eb3c6e41f29f815bb74 Mon Sep 17 00:00:00 2001 From: Jan Kaluza Date: Wed, 11 Jan 2017 14:34:00 +0100 Subject: [PATCH] Make SCM check faster by consolidating two SCM checks to single check executed in ThreadPool. Use 'git checkout' instead of HTTP HEAD request, because it is much faster in Fedora cgit case. --- module_build_service/scm.py | 60 ++++++++++++++-------------------- module_build_service/utils.py | 41 +++++++++++------------ tests/test_views/test_views.py | 14 ++++---- 3 files changed, 53 insertions(+), 62 deletions(-) diff --git a/module_build_service/scm.py b/module_build_service/scm.py index 81df8da5..58f0136d 100644 --- a/module_build_service/scm.py +++ b/module_build_service/scm.py @@ -100,10 +100,10 @@ class SCM(object): @staticmethod @module_build_service.utils.retry(wait_on=RuntimeError) - def _run(cmd, chdir=None): + def _run(cmd, chdir=None, log_stdout = False): proc = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE, cwd=chdir) stdout, stderr = proc.communicate() - if stdout: + if log_stdout and stdout: log.debug(stdout) if stderr: log.warning(stderr) @@ -149,53 +149,43 @@ class SCM(object): :raises: RuntimeError """ if self.scheme == "git": - cmd = ["git", "ls-remote", self.repository] - proc = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE) - output, stderr = proc.communicate() - if proc.returncode != 0: - raise RuntimeError("Cannot get git hash of %s HEAD in %s" - % (branch, self.repository)) + log.debug("Getting/verifying commit hash for %s" % self.repository) + output = SCM._run(["git", "ls-remote", self.repository])[1] for line in output.split(os.linesep): if line.endswith("\trefs/heads/%s" % branch): return line.split("\t")[0] # Hopefully `branch` is really a commit hash. Code later needs to verify this. - log.warn("Couldn't determine the git %s HEAD hash in %s." - % (branch, self.repository)) + if self.is_available(True): + return branch return branch else: raise RuntimeError("get_latest: Unhandled SCM scheme.") - def is_available(self): + def is_available(self, strict=False): """Check whether the scmurl is available for checkout. + :param bool strict: When True, raise expection on error instead of + returning False. :returns: bool -- the scmurl is available for checkout """ - # XXX: If implementing special hacks for pagure.io or github.com, don't - # forget about possible forks -- start with self.repository. - if self.repository.startswith("-git://pkgs.fedoraproject.org/"): - hc = http_client.HTTPConnection("pkgs.fedoraproject.org") - hc.request("HEAD", - "/cgit/rpms/" + self.name + ".git/commit/?id=" + self.commit) - rc = hc.getresponse().code - hc.close() - return True if rc == 200 else False - else: - td = None + td = None + try: + td = tempfile.mkdtemp() + self.checkout(td) + return True + except: + if strict: + raise + return False + finally: try: - td = tempfile.mkdtemp() - self.checkout(td) - return True - except: - return False - finally: - try: - if td is not None: - shutil.rmtree(td) - except Exception as e: - log.warning( - "Failed to remove temporary directory {!r}: {}".format( - td, str(e))) + if td is not None: + shutil.rmtree(td) + except Exception as e: + log.warning( + "Failed to remove temporary directory {!r}: {}".format( + td, str(e))) @property def url(self): diff --git a/module_build_service/utils.py b/module_build_service/utils.py index df298518..8a1aaddc 100644 --- a/module_build_service/utils.py +++ b/module_build_service/utils.py @@ -275,6 +275,18 @@ def _fetch_mmd(url, allow_local_url = False): return mmd, scm, yaml +def _scm_get_latest(pkg): + try: + # If the modulemd specifies that the 'f25' branch is what + # we want to pull from, we need to resolve that f25 branch + # to the specific commit available at the time of + # submission (now). + pkg.ref = module_build_service.scm.SCM( + pkg.repository).get_latest(branch=pkg.ref) + except Exception as e: + return "Failed to get the latest commit for %s#%s" % (pkg.repository, pkg.ref) + return None + def record_component_builds(scm, mmd, module, initial_batch = 1): # Import it here, because SCM uses utils methods # and fails to import them because of dep-chain. @@ -286,6 +298,7 @@ def record_component_builds(scm, mmd, module, initial_batch = 1): # If the modulemd yaml specifies components, then submit them for build if mmd.components: + # Add missing data in components for pkgname, pkg in mmd.components.rpms.items(): try: if pkg.repository and not conf.rpms_allow_repository: @@ -299,38 +312,26 @@ def record_component_builds(scm, mmd, module, initial_batch = 1): pkg.cache = conf.rpms_default_cache + pkgname if not pkg.ref: pkg.ref = 'master' - try: - # If the modulemd specifies that the 'f25' branch is what - # we want to pull from, we need to resolve that f25 branch - # to the specific commit available at the time of - # submission (now). - pkg.ref = module_build_service.scm.SCM( - pkg.repository).get_latest(branch=pkg.ref) - except Exception as e: - raise UnprocessableEntity( - "Failed to get the latest commit for %s#%s" % ( - pkgname, pkg.ref)) except Exception: module.transition(conf, models.BUILD_STATES["failed"]) db.session.add(module) db.session.commit() raise - full_url = "%s?#%s" % (pkg.repository, pkg.ref) - full_urls.append((pkgname, full_url)) - - log.debug("Checking scm urls") - # Checks the availability of SCM urls. - pool = ThreadPool(10) - err_msgs = pool.map(lambda data: "Cannot checkout {}".format(data[0]) - if not module_build_service.scm.SCM(data[1]).is_available() - else None, full_urls) + # Check that SCM URL is valid and replace potential branches in + # pkg.ref by real SCM hash. + pool = ThreadPool(20) + err_msgs = pool.map(_scm_get_latest, mmd.components.rpms.values()) # TODO: only the first error message is raised, perhaps concatenate # the messages together? for err_msg in err_msgs: if err_msg: raise UnprocessableEntity(err_msg) + for pkgname, pkg in mmd.components.rpms.items(): + full_url = "%s?#%s" % (pkg.repository, pkg.ref) + full_urls.append((pkgname, full_url)) + components = mmd.components.all components.sort(key=lambda x: x.buildorder) previous_buildorder = None diff --git a/tests/test_views/test_views.py b/tests/test_views/test_views.py index c29708c2..11464497 100644 --- a/tests/test_views/test_views.py +++ b/tests/test_views/test_views.py @@ -320,12 +320,12 @@ class TestViews(unittest.TestCase): @patch('module_build_service.scm.SCM') def test_submit_build_scm_parallalization(self, mocked_scm, mocked_assert_is_packager, mocked_get_username): - def mocked_scm_is_available(): + def mocked_scm_get_latest(branch = "master"): time.sleep(1) - return True + return branch - mocked_scm.return_value.is_available = mocked_scm_is_available mocked_scm_obj = MockedSCM(mocked_scm, "base-runtime", "base-runtime.yaml") + mocked_scm.return_value.is_available = mocked_scm_get_latest start = time.time() rv = self.client.post('/module-build-service/1/module-builds/', data=json.dumps( @@ -356,11 +356,11 @@ class TestViews(unittest.TestCase): @patch('module_build_service.scm.SCM') def test_submit_build_scm_non_available(self, mocked_scm, mocked_assert_is_packager, mocked_get_username): - def mocked_scm_is_available(): - return False + def mocked_scm_get_latest(): + raise RuntimeError("Failed in mocked_scm_get_latest") - mocked_scm.return_value.is_available = mocked_scm_is_available mocked_scm_obj = MockedSCM(mocked_scm, "base-runtime", "base-runtime.yaml") + mocked_scm.return_value.get_latest = mocked_scm_get_latest rv = self.client.post('/module-build-service/1/module-builds/', data=json.dumps( {'scmurl': 'git://pkgs.stg.fedoraproject.org/modules/' @@ -368,7 +368,7 @@ class TestViews(unittest.TestCase): data = json.loads(rv.data) self.assertEquals(data['status'], 422) - self.assertEquals(data['message'][:15], "Cannot checkout") + self.assertEquals(data['message'][:31], "Failed to get the latest commit") self.assertEquals(data['error'], "Unprocessable Entity") @patch('module_build_service.auth.get_username', return_value='Homer J. Simpson')