Merge #280 Make SCM check faster by consolidating two SCM checks to single check executed in ThreadPool. Use 'git checkout' instead of HTTP HEAD request, because it is much faster in Fedora cgit case.

This commit is contained in:
Jan Kaluža
2017-01-12 09:12:05 +00:00
3 changed files with 53 additions and 62 deletions

View File

@@ -100,10 +100,10 @@ class SCM(object):
@staticmethod
@module_build_service.utils.retry(wait_on=RuntimeError)
def _run(cmd, chdir=None):
def _run(cmd, chdir=None, log_stdout = False):
proc = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE, cwd=chdir)
stdout, stderr = proc.communicate()
if stdout:
if log_stdout and stdout:
log.debug(stdout)
if stderr:
log.warning(stderr)
@@ -149,53 +149,43 @@ class SCM(object):
:raises: RuntimeError
"""
if self.scheme == "git":
cmd = ["git", "ls-remote", self.repository]
proc = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
output, stderr = proc.communicate()
if proc.returncode != 0:
raise RuntimeError("Cannot get git hash of %s HEAD in %s"
% (branch, self.repository))
log.debug("Getting/verifying commit hash for %s" % self.repository)
output = SCM._run(["git", "ls-remote", self.repository])[1]
for line in output.split(os.linesep):
if line.endswith("\trefs/heads/%s" % branch):
return line.split("\t")[0]
# Hopefully `branch` is really a commit hash. Code later needs to verify this.
log.warn("Couldn't determine the git %s HEAD hash in %s."
% (branch, self.repository))
if self.is_available(True):
return branch
return branch
else:
raise RuntimeError("get_latest: Unhandled SCM scheme.")
def is_available(self):
def is_available(self, strict=False):
"""Check whether the scmurl is available for checkout.
:param bool strict: When True, raise expection on error instead of
returning False.
:returns: bool -- the scmurl is available for checkout
"""
# XXX: If implementing special hacks for pagure.io or github.com, don't
# forget about possible forks -- start with self.repository.
if self.repository.startswith("-git://pkgs.fedoraproject.org/"):
hc = http_client.HTTPConnection("pkgs.fedoraproject.org")
hc.request("HEAD",
"/cgit/rpms/" + self.name + ".git/commit/?id=" + self.commit)
rc = hc.getresponse().code
hc.close()
return True if rc == 200 else False
else:
td = None
td = None
try:
td = tempfile.mkdtemp()
self.checkout(td)
return True
except:
if strict:
raise
return False
finally:
try:
td = tempfile.mkdtemp()
self.checkout(td)
return True
except:
return False
finally:
try:
if td is not None:
shutil.rmtree(td)
except Exception as e:
log.warning(
"Failed to remove temporary directory {!r}: {}".format(
td, str(e)))
if td is not None:
shutil.rmtree(td)
except Exception as e:
log.warning(
"Failed to remove temporary directory {!r}: {}".format(
td, str(e)))
@property
def url(self):

View File

@@ -275,6 +275,18 @@ def _fetch_mmd(url, allow_local_url = False):
return mmd, scm, yaml
def _scm_get_latest(pkg):
try:
# If the modulemd specifies that the 'f25' branch is what
# we want to pull from, we need to resolve that f25 branch
# to the specific commit available at the time of
# submission (now).
pkg.ref = module_build_service.scm.SCM(
pkg.repository).get_latest(branch=pkg.ref)
except Exception as e:
return "Failed to get the latest commit for %s#%s" % (pkg.repository, pkg.ref)
return None
def record_component_builds(scm, mmd, module, initial_batch = 1):
# Import it here, because SCM uses utils methods
# and fails to import them because of dep-chain.
@@ -286,6 +298,7 @@ def record_component_builds(scm, mmd, module, initial_batch = 1):
# If the modulemd yaml specifies components, then submit them for build
if mmd.components:
# Add missing data in components
for pkgname, pkg in mmd.components.rpms.items():
try:
if pkg.repository and not conf.rpms_allow_repository:
@@ -299,38 +312,26 @@ def record_component_builds(scm, mmd, module, initial_batch = 1):
pkg.cache = conf.rpms_default_cache + pkgname
if not pkg.ref:
pkg.ref = 'master'
try:
# If the modulemd specifies that the 'f25' branch is what
# we want to pull from, we need to resolve that f25 branch
# to the specific commit available at the time of
# submission (now).
pkg.ref = module_build_service.scm.SCM(
pkg.repository).get_latest(branch=pkg.ref)
except Exception as e:
raise UnprocessableEntity(
"Failed to get the latest commit for %s#%s" % (
pkgname, pkg.ref))
except Exception:
module.transition(conf, models.BUILD_STATES["failed"])
db.session.add(module)
db.session.commit()
raise
full_url = "%s?#%s" % (pkg.repository, pkg.ref)
full_urls.append((pkgname, full_url))
log.debug("Checking scm urls")
# Checks the availability of SCM urls.
pool = ThreadPool(10)
err_msgs = pool.map(lambda data: "Cannot checkout {}".format(data[0])
if not module_build_service.scm.SCM(data[1]).is_available()
else None, full_urls)
# Check that SCM URL is valid and replace potential branches in
# pkg.ref by real SCM hash.
pool = ThreadPool(20)
err_msgs = pool.map(_scm_get_latest, mmd.components.rpms.values())
# TODO: only the first error message is raised, perhaps concatenate
# the messages together?
for err_msg in err_msgs:
if err_msg:
raise UnprocessableEntity(err_msg)
for pkgname, pkg in mmd.components.rpms.items():
full_url = "%s?#%s" % (pkg.repository, pkg.ref)
full_urls.append((pkgname, full_url))
components = mmd.components.all
components.sort(key=lambda x: x.buildorder)
previous_buildorder = None

View File

@@ -320,12 +320,12 @@ class TestViews(unittest.TestCase):
@patch('module_build_service.scm.SCM')
def test_submit_build_scm_parallalization(self, mocked_scm,
mocked_assert_is_packager, mocked_get_username):
def mocked_scm_is_available():
def mocked_scm_get_latest(branch = "master"):
time.sleep(1)
return True
return branch
mocked_scm.return_value.is_available = mocked_scm_is_available
mocked_scm_obj = MockedSCM(mocked_scm, "base-runtime", "base-runtime.yaml")
mocked_scm.return_value.is_available = mocked_scm_get_latest
start = time.time()
rv = self.client.post('/module-build-service/1/module-builds/', data=json.dumps(
@@ -356,11 +356,11 @@ class TestViews(unittest.TestCase):
@patch('module_build_service.scm.SCM')
def test_submit_build_scm_non_available(self, mocked_scm,
mocked_assert_is_packager, mocked_get_username):
def mocked_scm_is_available():
return False
def mocked_scm_get_latest():
raise RuntimeError("Failed in mocked_scm_get_latest")
mocked_scm.return_value.is_available = mocked_scm_is_available
mocked_scm_obj = MockedSCM(mocked_scm, "base-runtime", "base-runtime.yaml")
mocked_scm.return_value.get_latest = mocked_scm_get_latest
rv = self.client.post('/module-build-service/1/module-builds/', data=json.dumps(
{'scmurl': 'git://pkgs.stg.fedoraproject.org/modules/'
@@ -368,7 +368,7 @@ class TestViews(unittest.TestCase):
data = json.loads(rv.data)
self.assertEquals(data['status'], 422)
self.assertEquals(data['message'][:15], "Cannot checkout")
self.assertEquals(data['message'][:31], "Failed to get the latest commit")
self.assertEquals(data['error'], "Unprocessable Entity")
@patch('module_build_service.auth.get_username', return_value='Homer J. Simpson')