Try to refactor a bunch of our scattered retry logic.

This commit is contained in:
Ralph Bean
2016-07-30 11:02:57 -04:00
parent cdc7e366f9
commit 283c48dbbc
4 changed files with 109 additions and 78 deletions

View File

@@ -45,6 +45,8 @@ import kobo.rpmlib
import munch
from OpenSSL.SSL import SysCallError
import rida.utils
logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger(__name__)
@@ -161,16 +163,6 @@ class Builder:
else:
raise ValueError("Builder backend='%s' not recognized" % backend)
def retry(callback, **kwargs):
attempt = 0
log.debug("retry() calling %r(kwargs=%r)" % (callback, kwargs))
while True:
try:
callback(**kwargs)
break
except SysCallError:
attempt += 1
log.warn("retry(attempt=%d) calling %r(kwargs=%r)" % (attempt, callback, kwargs))
class KojiModuleBuilder(GenericBuilder):
""" Koji specific builder class """
@@ -362,7 +354,13 @@ chmod 644 %buildroot/%_rpmconfigdir/macros.d/macros.modules
groups = KOJI_DEFAULT_GROUPS # TODO: read from config
if groups:
retry(self._koji_add_groups_to_tag, dest_tag=self.module_build_tag, groups=groups)
@rida.utils.retry(wait_on=SysCallError, interval=5)
def add_groups():
return self._koji_add_groups_to_tag(
dest_tag=self.module_build_tag,
groups=groups,
)
add_groups()
self.module_target = self._koji_add_target(self.tag_name, self.module_build_tag, self.module_tag)
self.__prep = True
@@ -395,21 +393,19 @@ chmod 644 %buildroot/%_rpmconfigdir/macros.d/macros.modules
:param task_id
:return - task result object
"""
start = time.time()
timeout = 60 # minutes
log.info("Waiting for task_id=%s to finish" % task_id)
while True:
if (time.time() - start) >= (timeout * 60.0):
break
try:
log.debug("Waiting for task_id=%s to finish" % task_id)
return self.koji_session.getTaskResult(task_id)
except koji.GenericError:
time.sleep(30)
log.info("Done waiting for task_id=%s to finish" % task_id)
return 1
timeout = 60 * 60 # 60 minutes
@rida.utils.retry(timeout=timeout, wait_on=koji.GenericError)
def get_result():
log.debug("Waiting for task_id=%s to finish" % task_id)
task = self.koji_session.getTaskResult(task_id)
log.info("Done waiting for task_id=%s to finish" % task_id)
return task
return get_result()
def build(self, artifact_name, source):
"""

View File

@@ -26,12 +26,12 @@
import rida.builder
import rida.database
import rida.pdc
import rida.utils
import koji
import logging
import os
import time
logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger(__name__)
@@ -66,25 +66,30 @@ def wait(config, session, msg):
tag = None
dependencies = None
pdc_session = rida.pdc.get_pdc_client_session(config)
retry_interval = 60 # seconds
max_attempts = 5
attempts = 0
while True:
if attempts >= max_attempts: # XXX: hack for pdc not sending messages about unreleased variants
log.error("Failed to get module info from PDC. Max retries reached.")
build.transition(config, state="build") # Wait for the buildroot to be ready.a
break
try:
qi = {'name': module_info['name'], 'version': module_info['version'], 'release': module_info['release']}
attempts += 1
log.info("Getting %s deps from pdc" % module_info['name'])
dependencies = rida.pdc.get_module_build_dependencies(pdc_session, qi, strict=True)
log.info("Getting %s tag from pdc" % module_info['name'])
tag = rida.pdc.get_module_tag(pdc_session, qi, strict=True)
except ValueError as e:
log.debug(e)
log.warn("Waiting additional %d seconds for PDC/%s" % (retry_interval, qi))
time.sleep(retry_interval)
pdc_query = {
'name': module_info['name'],
'version': module_info['version'],
'release': module_info['release'],
}
@rida.utils.retry(interval=60, timeout=60*6, wait_on=ValueError)
def _get_deps_and_tag():
log.info("Getting %s deps from pdc" % module_info['name'])
dependencies = rida.pdc.get_module_build_dependencies(
pdc_session, pdc_query, strict=True)
log.info("Getting %s tag from pdc" % module_info['name'])
tag = rida.pdc.get_module_tag(
pdc_session, pdc_query, strict=True)
return dependencies, tag
try:
dependencies, tag = _get_deps_and_tag()
except ValueError:
log.exception("Failed to get module info from PDC. Max retries reached.")
build.transition(config, state="build") # Wait for the buildroot to be ready.a
session.commit()
raise
log.debug("Found tag=%s for module %r" % (tag, build))
# Hang on to this information for later. We need to know which build is

View File

@@ -30,13 +30,16 @@
from six.moves import http_client
import os
import sys
import time
import traceback
import subprocess
import subprocess as sp
import re
import tempfile
import logging
log = logging.getLogger(__name__)
import rida.utils
class SCM(object):
"SCM abstraction class"
@@ -89,33 +92,19 @@ class SCM(object):
else:
raise RuntimeError("Unhandled SCM scheme: %s" % self.scheme)
@rida.utils.retry(wait_on=RuntimeError)
@staticmethod
def _run(cmd, chdir=None):
numretry = 0
path = cmd[0]
args = cmd
pid = os.fork()
if not pid:
while numretry <= 3:
numretry += 1
try:
if chdir:
os.chdir(chdir)
os.execvp(path, args)
except: # XXX maybe switch to subprocess (python-3.5) where
# we can check for return codes and timeouts
msg = ''.join(traceback.format_exception(*sys.exc_info()))
print(msg)
if numretry == 3:
os._exit(1)
time.sleep(10)
else:
while True:
status = os.waitpid(pid, os.WNOHANG)
time.sleep(1)
if status[0] != 0:
return status[1]
proc = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE, cwd=chdir)
stdout, stderr = proc.communicate()
if stdout:
log.debug(stdout)
if stderr:
log.warning(stderr)
if proc.returncode != 0:
raise RuntimeError("Failed on %r, retcode %r, out %r, err %r" % (
cmd, proc.returncode, stdout, stderr))
return proc.returncode
def checkout(self, scmdir):
"""Checkout the module from SCM.
@@ -136,12 +125,9 @@ class SCM(object):
module_clone_cmd.extend([self.repository, sourcedir])
# perform checkouts
if not SCM._run(module_clone_cmd, chdir=scmdir) == 0:
raise RuntimeError("Git clone failed: %s" % self.repository)
SCM._run(module_clone_cmd, chdir=scmdir)
if self.commit:
if not SCM._run(module_checkout_cmd, chdir=sourcedir) == 0:
raise RuntimeError("Git checkout failed: %s?#%s" %
(self.repository, self.commit))
SCM._run(module_checkout_cmd, chdir=sourcedir)
else:
raise RuntimeError("checkout: Unhandled SCM scheme.")
return sourcedir
@@ -153,7 +139,7 @@ class SCM(object):
:raises: RuntimeError
"""
if self.scheme == "git":
(status , output) = subprocess.getstatusoutput("git ls-remote %s"
(status , output) = sp.getstatusoutput("git ls-remote %s"
% self.repository)
if status != 0:
raise RuntimeError("Cannot get git hash of master HEAD in %s"

44
rida/utils.py Normal file
View File

@@ -0,0 +1,44 @@
# Copyright (c) 2016 Red Hat, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# Written by Ralph Bean <rbean@redhat.com>
""" Utility functions for rida. """
import functools
import time
def retry(timeout=120, interval=30, wait_on=Exception):
""" A decorator that allows to retry a section of code...
...until success or timeout.
"""
def wrapper(function):
@functools.wraps(function)
def inner(*args, **kwargs):
start = time.time()
while True:
if (time.time() - start) >= (timeout * 60.0):
raise # This re-raises the last exception.
try:
return function(*args, **kwargs)
except wait_on:
time.sleep(interval)
return inner
return wrapper