updates-uptimes: Use machine data to find reboot/etc. utf8 diff. uptime-max.

Signed-off-by: James Antill <james@and.org>
This commit is contained in:
James Antill
2025-08-19 19:10:58 -04:00
parent e8cea23281
commit 57bdad10fb
2 changed files with 188 additions and 52 deletions

View File

@@ -20,8 +20,9 @@
# $0 list '*.stg.*' ... see what staging looks like.
# $0 list '*copr*' ... see what copr looks like.
# $0 history-keep 4 ... keep four days of history (including today)
# $0 uptime 1d ... see what hasn't been rebooted in the last 24 hours.
# $0 uptime 25w ... see what hasn't been rebooted in too damn long.
# $0 uptime-min 1d ... see what hasn't been rebooted in the last 24 hours.
# $0 uptime-max 1d ... see what has been rebooted in the last 24 hours.
# $0 uptime-min 25w ... see what hasn't been rebooted in too damn long.
# $0 update-daily-refresh ... daily update, including a new history, and
# refresh the main file (so any old hosts aren't there anymore).
@@ -29,15 +30,31 @@ import os
import sys
import fnmatch
import glob
import locale
import shutil
import time
# Use utf8 prefixes in diff
conf_utf8_diff = True
_conf_utf8_boot_ed = '' # Rebooted
_conf_utf8_boot_up = '' # Rebooted and updated
_conf_utf8_more_up = ''
_conf_utf8_less_up = ''
_conf_utf8_diff_os = '' # '♺' OSinfo is different, but the machine is the same
_conf_utf8_diff_hw = '' # '모' machine_id is different
# If we try to update this seconds since the file changed, flush the
# ansible FACT cache.
conf_dur_flush_cache = (60*60*8)
# This is kind of a hack, if you run from a cron job then it should run at
# the same time each day, and this should be 1 hour or less. But life isn't
# perfect, so we give it some more time.
# The two competing problems are 1) reboot machine with low uptime.
# 2) get data at 23:59 yesterday and 0:01 today.
# ...and we can't fix both.
conf_tmdiff_fudge = (60*60*8)
# How many hosts to show in tier 4 updates/uptimes...
conf_stat_4_hosts = 4
@@ -81,11 +98,31 @@ except locale.Error:
locale.setlocale(locale.LC_ALL, 'C')
fname = conf_path + "ansible-list-updates-uptime.txt"
_fname = "ansible-list-updates-uptime.txt"
fname = conf_path + _fname
backup_today = time.strftime("%Y-%m-%d", time.gmtime())
fname_today = fname + '.' + backup_today
backups = sorted(x.removeprefix(fname + '.') for x in glob.glob(fname + '.*'))
# History files are named <fname>.YYYY-MM-DD
def _glob_hist_suffix():
for fn in os.listdir(os.path.dirname(fname)):
if not fn.startswith(_fname + '.'):
continue
fn = fn.removeprefix(_fname + '.')
if len(fn) != len("YYYY-MM-DD"):
continue
if fn[0] != '2': continue # Year
if fn[1] not in "0123456789": continue
if fn[2] not in "0123456789": continue
if fn[3] not in "0123456789": continue
if fn[4] != '-': continue
if fn[5] not in "01": continue # Month
if fn[6] not in "0123456789": continue
if fn[7] != '-': continue
if fn[8] not in "0123": continue # Day
if fn[9] not in "0123456789": continue
yield fn
backups = sorted(_glob_hist_suffix())
tm_yesterday = int(time.time()) - (60*60*24)
backup_yesterday = time.strftime("%Y-%m-%d", time.gmtime(tm_yesterday))
@@ -113,7 +150,7 @@ class Host():
""" Class for holding the Host data from a line in the files. """
__slots__ = ['name', 'rpms', 'uptime', 'date', 'osname', 'osvers',
'osname_small']
'osname_small', 'machine_id', 'boot_id']
def __init__ (self, data):
global _max_len_osnm
@@ -123,9 +160,13 @@ class Host():
self.rpms = data['rpms']
self.uptime = data['uptime']
self.date = data['date']
self.osname = data['osname']
self.osvers = data['osvers']
self.machine_id = data['machine_id']
self.boot_id = data['boot_id']
if False: pass
elif self.osname == 'CentOS':
osname_small = 'EL'
@@ -156,6 +197,8 @@ class Host():
return False
if self.osvers != other.osvers:
return False
if self.machine_id != other.machine_id:
return False
return True
def __gt__(self, other):
@@ -213,7 +256,7 @@ if len(sys.argv) >= 2:
"stats",
"update", "update-fast", "update-flush",
"update-daily", "update-daily-refresh",
"uptime",):
"uptime", "uptime-min", "uptime-max",):
cmd = sys.argv.pop(1)
_tm_d = {'d' : 60*60*24, 'h' : 60*60, 'm' : 60, 's' : 1,
@@ -267,6 +310,8 @@ def format_duration(seconds, short=False, static=False):
if short:
if dur == 0 and not static:
return '<1h'
if dur == 0:
return '<01h'
ret = []
dur = _add_dur(dur, ret, 24, "h", static=static)
dur = _add_dur(dur, ret, 7, "d", static=static)
@@ -304,11 +349,12 @@ cmp = None
# This does arguments for a bunch of commands, like stats/list/etc.
# by using fname1() after, which looks at cmp_arg.
# But also does diff arguments.
def _cmp_arg():
def _cmp_arg(usage=True):
global cmp
global cmp_arg
if len(sys.argv) < 2 or sys.argv[1] == "main":
cmp_arg = False
if len(sys.argv) >= 2:
sys.argv.pop(1)
cmp = backups[-1] # Most recent
@@ -323,8 +369,9 @@ def _cmp_arg():
cmp = backup_yesterday
cmp_arg = True
elif sys.argv[1] not in backups:
_usage()
print("Backups:", ", ".join(backups))
if usage:
_usage()
print("History:", ", ".join(backups))
sys.exit(1)
else:
cmp = sys.argv[1]
@@ -339,8 +386,12 @@ def line2data(line):
name, rpms, uptime, date = line.split(' ', 3)
osname = "Unknown"
osvers = "?"
machine_id = "?"
boot_id = "?"
if ' ' in date:
date, osname, osvers = date.split(' ', 2)
if ' ' in osvers:
osvers, machine_id, boot_id = osvers.split(' ', 2)
rpms = int(rpms)
uptime = int(uptime)
@@ -358,12 +409,19 @@ def filter_name_datas(datas, name):
yield data
# Filter datas using uptime as a minium.
def filter_uptime_datas(datas, uptime):
def filter_uptime_min_datas(datas, uptime):
for data in datas:
if data.uptime < uptime:
continue
yield data
# Filter datas using uptime as a maximum.
def filter_uptime_max_datas(datas, uptime):
for data in datas:
if data.uptime > uptime:
continue
yield data
# Sub. suffix of DNS names for UI
def _ui_name(name):
for suffix in conf_suffix_dns_replace:
@@ -420,6 +478,27 @@ def fname1():
return lines2datas(bfname2lines(cmp))
return fname_datas()
# Has the host been rebooted between these two points.
def host_rebooted(d1, d2):
if d1.boot_id != d2.boot_id:
return True
if d2.boot_id != '?':
return False
# Now we try to work it out from uptime...
if d1.date == d2.date and d1.uptime > d2.uptime:
return True
# However, we can be looking at old history
tm1 = time.mktime(time.strptime(d1.date, "%Y-%m-%d"))
tm2 = time.mktime(time.strptime(d2.date, "%Y-%m-%d"))
if tm1 > tm2: # Looking backwards in time...
return False
d1up = d1.uptime
tmdiff = tm2 - tm1
if tmdiff > conf_tmdiff_fudge:
d1up += tmdiff - conf_tmdiff_fudge
return d1up > d2.uptime
_max_len_name = 0
_max_len_rpms = 0 # Number of rpm updates via. _ui_int().
_max_len_upts = 0 # Uptime duration with short=True
@@ -461,7 +540,7 @@ def _max_update_correct(prefix):
# Return stats for updates added/deleted between two data sets.
def _diffstats(data1, data2):
uadd, udel = 0, 0
uadd, udel, boot = 0, 0, 0
data1 = list(sorted(data1))
data2 = list(sorted(data2))
@@ -469,6 +548,7 @@ def _diffstats(data1, data2):
if len(data1) <= 0:
d2 = data2.pop(0)
uadd += d2.rpms
boot += 1
continue
if len(data2) <= 0:
d1 = data1.pop(0)
@@ -486,14 +566,18 @@ def _diffstats(data1, data2):
if d1.name > d2.name:
uadd += d2.rpms
data2.pop(0)
boot += 1
continue
if d1 == d2:
if host_rebooted(d1, d2):
boot += 1
data1.pop(0)
data2.pop(0)
continue
if d1.osinfo != d2.osinfo:
if d1.machine_id != d2.machine_id or d1.osinfo != d2.osinfo:
boot += 1
udel -= d1.rpms
uadd += d2.rpms
data1.pop(0)
@@ -501,6 +585,8 @@ def _diffstats(data1, data2):
continue
# Now name is eq and osinfo is eq
if host_rebooted(d1, d2):
boot += 1
# So either new updates arrived, or we installed some and they went
# down ... alas. we can't tell if both happened.
if d1.rpms > d2.rpms:
@@ -511,11 +597,11 @@ def _diffstats(data1, data2):
data2.pop(0)
# diffstat returns...
return uadd, udel
return uadd, udel, boot
def _ui_diffstats(data1, data2):
cmpds = _diffstats(data1, data2)
return _ui_int(cmpds[0]), _ui_int(cmpds[1])
return _ui_int(cmpds[0]), _ui_int(cmpds[1]), _ui_int(cmpds[2])
@@ -573,7 +659,7 @@ def _backup_suffix(backup):
suffix = ''
if backup == backup_today:
if ident:
suffix = ' (today, is identical)'
suffix = ' (today, is eq)'
else:
suffix = ' (today)'
if backup == backup_yesterday:
@@ -590,7 +676,8 @@ if cmd in ("backups", "hist", "history"):
# We _could_ open+read+etc each file, just to find out the max updates for
# all hist ... but len("Updates")+2=9 which means 9,999,999 updates)
hl = len("Hosts")
ul = len("Updates") + 2
ul = len("Updates")
rl = len("Boots")
if conf_fast_width_history:
ul += 2
else:
@@ -602,17 +689,17 @@ if cmd in ("backups", "hist", "history"):
hl = max(hl, len(_ui_int(len(data))))
ul = max(ul, len(updates))
print(" %10s %*s %*s %*s %*s" % ("Day", hl, "Hosts",
ul, "Updates", ul, "Avail", ul, "Inst."))
print(" %10s %*s %*s %*s %*s %*s" % ("Day", hl, "Hosts",
ul, "Updates", ul, "Avail", ul+1, "Inst.", rl, "Boots"))
for backup in reversed(backups):
data = list(sorted(lines2datas(bfname2lines(backup))))
updates = _ui_int(sum(d.rpms for d in last_data))
ul = max(ul, len(updates))
cmpds = _ui_diffstats(data.copy(), last_data.copy())
print(' %10s %*s %*s, %*s %*s, %s' % (last_name,
print(' %10s %*s %*s, %*s %*s, %*s %s' % (last_name,
hl, _ui_int(len(last_data)),
ul, updates, ul, cmpds[0], ul+1, cmpds[1], last_suff))
ul, updates, ul, cmpds[0], ul+1, cmpds[1], rl, cmpds[2], last_suff))
last_name = backup
last_data = data
last_suff = _backup_suffix(backup)
@@ -666,9 +753,9 @@ if not os.path.exists(fname):
if not _main_file_recent():
print(" Warning: Main file is old. Run update sub-command", file=sys.stderr)
if fname_today is None:
print(" Warning: Backup for today does not exist!", file=sys.stderr)
print(" Warning: History for today does not exist!", file=sys.stderr)
if fname_yesterday is None:
print(" Warning: Backup for yesterday does not exist!", file=sys.stderr)
print(" Warning: History for yesterday does not exist!", file=sys.stderr)
def _cli_match_host(data):
if len(sys.argv) >= 2:
@@ -808,6 +895,8 @@ def _print_info(host, data):
print(" Updates:", _ui_int(host.rpms))
print(" Uptime:", format_duration(host.uptime)) # !ui_dur
print(" Checked:", host.date)
print(" Machine:", host.machine_id)
print(" Boot:", host.boot_id)
if cmd in ("host", "info"):
if cmd == "host":
@@ -818,12 +907,18 @@ if cmd in ("host", "info"):
host = sys.argv.pop(1)
if len(sys.argv) >= 2 and sys.argv[1] == "all":
for b in backups:
print("Backup:", b)
print("History:", b)
_print_info(host, lines2datas(bfname2lines(b)))
sys.argv = [sys.argv[0]]
while True:
_cmp_arg()
if cmp_arg: # One or more historical files...
print("History:", sys.argv.pop(1))
else:
print("Main:")
_cmp_arg()
_print_info(host, fname1())
_print_info(host, fname1())
if len(sys.argv) < 2:
break
def _print_line(prefix, data):
print("%s%-*s %*s %*s %*s %s" % (prefix,
@@ -847,14 +942,17 @@ if cmd == "list":
_print_line('', d1)
_explain_ui_name()
if cmd == "uptime":
if cmd in ("uptime", "uptime-max", "uptime-min"):
age = 0
if len(sys.argv) >= 2:
age = parse_duration(sys.argv.pop(1))
_cmp_arg()
data = fname1()
data = list(filter_uptime_datas(data, age))
if cmd == "uptime-max":
data = list(filter_uptime_max_datas(data, age))
else:
data = list(filter_uptime_min_datas(data, age))
_max_update(data)
_max_update_correct('')
for d1 in data:
@@ -910,25 +1008,44 @@ if cmd in ("diff", "diff-u"):
data2.pop(0)
continue
if d1 == d2:
_print_line(' ', d2)
data1.pop(0)
data2.pop(0)
continue
if cmd == "diff-u":
_print_line('-', d1)
data1.pop(0)
_print_line('+', d2)
data2.pop(0)
continue
# diff
# d1.name == d2.name; so both are going now
data1.pop(0)
_print_line('!', d2)
data2.pop(0)
# Name, rpms, and OSname/OSvers are the same
if d1 == d2:
if cmd == "diff" and conf_utf8_diff and host_rebooted(d1, d2):
_print_line(_conf_utf8_boot_ed, d2)
continue
_print_line(' ', d2)
continue
# Something about host changed, show old/new...
if cmd == "diff-u":
_print_line('-', d1)
_print_line('+', d2)
continue
# Something changed, but we only show the new data...
if conf_utf8_diff:
if False: pass
elif d1.machine_id != d2.machine_id:
_print_line(_conf_utf8_diff_hw, d2)
elif d1.osinfo != d2.osinfo:
_print_line(_conf_utf8_diff_os, d2)
elif host_rebooted(d1, d2) and d1.rpms > d2.rpms:
_print_line(_conf_utf8_boot_up, d2)
elif host_rebooted(d1, d2):
_print_line(_conf_utf8_boot_ed, d2)
elif d1.rpms > d2.rpms:
_print_line(_conf_utf8_less_up, d2)
else: # d1.rpms < d2.rpms:
_print_line(_conf_utf8_more_up, d2)
continue
_print_line('!', d2)
continue
print('hosts=%s updates=%s (a=%s i=%s)' % (hosts, updates, cmpds[0],cmpds[1]))
print('hosts=%s updates=%s (a=%s i=%s) boots=%s' % (hosts, updates,
cmpds[0], cmpds[1], cmpds[2]))
_explain_ui_name()