From 08375ad9e98c492bfbe2b86fd186275c000cd9b1 Mon Sep 17 00:00:00 2001 From: James Antill Date: Wed, 20 Aug 2025 13:51:18 -0400 Subject: [PATCH] updates-uptimes: Fixed facts to be fresh. Fixed unknown val cmp. New host cmd. Signed-off-by: James Antill --- files/scripts/updates-uptime-cmd.py | 383 +++++++++++------- ...generate-updates-uptimes-per-host-file.yml | 16 +- 2 files changed, 248 insertions(+), 151 deletions(-) diff --git a/files/scripts/updates-uptime-cmd.py b/files/scripts/updates-uptime-cmd.py index 840d8b7844..a0de2a6ab4 100755 --- a/files/scripts/updates-uptime-cmd.py +++ b/files/scripts/updates-uptime-cmd.py @@ -43,17 +43,13 @@ _conf_utf8_less_up = '➘' _conf_utf8_diff_os = '➜' # '♺' OSinfo is different, but the machine is the same _conf_utf8_diff_hw = '⇉' # '모' machine_id is different -# If we try to update this seconds since the file changed, flush the -# ansible FACT cache. -conf_dur_flush_cache = (60*60*8) - # This is kind of a hack, if you run from a cron job then it should run at # the same time each day, and this should be 1 hour or less. But life isn't # perfect, so we give it some more time. # The two competing problems are 1) reboot machine with low uptime. # 2) get data at 23:59 yesterday and 0:01 today. -# ...and we can't fix both. -conf_tmdiff_fudge = (60*60*8) +# ...and we can't fix both without boot_id's ... so just have those. +conf_tmdiff_fudge = (60*60*4) # How many hosts to show in tier 4 updates/uptimes... conf_stat_4_hosts = 4 @@ -64,6 +60,9 @@ conf_short_duration = True # Do we want a small osinfo in diff/list/etc. conf_small_osinfo = True +# Show machine/boot id's in info command, by default (use -v). +conf_info_machine_ids = False + # Try to print OS/ver even nicer (when small) ... but includes spaces. conf_align_osinfo_small = True @@ -74,6 +73,9 @@ conf_fast_width_history = True # at their uptime. Only for the main file. Assume they are still up etc. conf_dynamic_main_uptime = True +# Hosts that we'll show info. for, by default. info/host cmds. +conf_important_hosts = ["batcave*", "bastion01*", "noc*"] + # Remove suffix noise in names. conf_suffix_dns_replace = { '.fedoraproject.org' : '..org', @@ -141,9 +143,17 @@ if len(sys.argv) >= 2: sys.argv.remove('-v') # In theory sys.argv[0] but meh conf_small_osinfo = False conf_short_duration = False + conf_info_machine_ids = True conf_stat_4_hosts *= 4 conf_suffix_dns_replace = {} + +def _wild_eq(s1, s2): + """ Compare two strings, but allow '?' to mean anything. """ + if s1 == '?' or s2 == '?': + return True + return s1 == s2 + _max_len_osnm = 0 # osname_small _max_len_osvr = 0 # osvers ... upto the first '.' class Host(): @@ -193,11 +203,11 @@ class Host(): return False if self.rpms != other.rpms: return False - if self.osname != other.osname: + if not _wild_eq(self.osname, other.osname): return False - if self.osvers != other.osvers: + if not _wild_eq(self.osvers, other.osvers): return False - if self.machine_id != other.machine_id: + if not _wild_eq(self.machine_id, other.machine_id): return False return True @@ -214,7 +224,7 @@ class Host(): if self.osname > other.osname: return True - if self.osname != other.osname: + if not _wild_eq(self.osname, other.osname): return False if self.osvers > other.osvers: @@ -251,8 +261,9 @@ if len(sys.argv) >= 2: "hist", "history", "history-keep", "diff", "diff-u", "help", - "host", "info", - "list", + "host", "hosts", + "info", "information", + "list", "list-n", "stats", "update", "update-fast", "update-flush", "update-daily", "update-daily-refresh", @@ -384,7 +395,7 @@ def line2data(line): global _max_len_osvr name, rpms, uptime, date = line.split(' ', 3) - osname = "Unknown" + osname = "?" osvers = "?" machine_id = "?" boot_id = "?" @@ -402,9 +413,17 @@ def lines2datas(lines): return (line2data(line) for line in lines) # Filter datas using name as a filename wildcard match. -def filter_name_datas(datas, name): +def filter_name_datas(datas, names): + if not names: # Allow everything... + for data in datas: + yield data + return + for data in datas: - if not fnmatch.fnmatch(data.name, name): + for name in names: + if fnmatch.fnmatch(data.name, name): + break + else: continue yield data @@ -480,10 +499,9 @@ def fname1(): # Has the host been rebooted between these two points. def host_rebooted(d1, d2): - if d1.boot_id != d2.boot_id: - return True - if d2.boot_id != '?': - return False + # This is easy mode, just compare boot ids + if d1.boot_id != '?' and d2.boot_id != '?': + return d1.boot_id != d2.boot_id # Now we try to work it out from uptime... if d1.date == d2.date and d1.uptime > d2.uptime: @@ -538,6 +556,13 @@ def _max_update_correct(prefix): while _max_len_name + _max_len_rpms + _max_len_upts + _max_len_date >= mw: _max_len_name -= 1 +def _wild_info_eq(d1, d2): + if not _wild_eq(d1.osname, d2.osname): + return False + if not _wild_eq(d1.osvers, d2.osvers): + return False + return True + # Return stats for updates added/deleted between two data sets. def _diffstats(data1, data2): uadd, udel, boot = 0, 0, 0 @@ -576,7 +601,8 @@ def _diffstats(data1, data2): data2.pop(0) continue - if d1.machine_id != d2.machine_id or d1.osinfo != d2.osinfo: + if (not _wild_eq(d1.machine_id, d2.machine_id) or + not _wild_info_eq(d1, d2)): boot += 1 udel -= d1.rpms uadd += d2.rpms @@ -627,8 +653,8 @@ def _usage(): history-keep [days] = Cleanup old history. - host [host*] [backup] - = See the current state of a host(s), can be filtered by name. + hosts [host*] [host*]... + = See the history of a host(s). info [host*] [backup] = See the current state, in long form, can be filtered by name. list [host*] [backup] @@ -640,9 +666,8 @@ def _usage(): update = Create the file and/or do backups. update-fast - = Create the file. update-flush - = Create the file, after flushing ansible caches. + = Create the file. update-daily = update-flush and do backups. update-daily-refresh @@ -655,58 +680,6 @@ def _usage(): if cmd == "help": _usage() -def _backup_suffix(backup): - suffix = '' - if backup == backup_today: - if ident: - suffix = ' (today, is eq)' - else: - suffix = ' (today)' - if backup == backup_yesterday: - suffix = ' (yesterday)' - return suffix - -if cmd in ("backups", "hist", "history"): - ident = _backup_today_identical() - print("History:") - last_name = "main" - last_data = list(sorted(fname_datas())) - last_suff = "" - - # We _could_ open+read+etc each file, just to find out the max updates for - # all hist ... but len("Updates")+2=9 which means 9,999,999 updates) - hl = len("Hosts") - ul = len("Updates") - rl = len("Boots") - if conf_fast_width_history: - ul += 2 - else: - # Whatever, it's less memory than holding all history at once if you want - # to enable it.. - for backup in reversed(backups): - data = list(sorted(lines2datas(bfname2lines(backup)))) - updates = _ui_int(sum(d.rpms for d in data)) - hl = max(hl, len(_ui_int(len(data)))) - ul = max(ul, len(updates)) - - print(" %10s %*s %*s %*s %*s %*s" % ("Day", hl, "Hosts", - ul, "Updates", ul, "Avail", ul+1, "Inst.", rl, "Boots")) - - for backup in reversed(backups): - data = list(sorted(lines2datas(bfname2lines(backup)))) - updates = _ui_int(sum(d.rpms for d in last_data)) - ul = max(ul, len(updates)) - cmpds = _ui_diffstats(data.copy(), last_data.copy()) - print(' %10s %*s %*s, %*s %*s, %*s %s' % (last_name, - hl, _ui_int(len(last_data)), - ul, updates, ul, cmpds[0], ul+1, cmpds[1], rl, cmpds[2], last_suff)) - last_name = backup - last_data = data - last_suff = _backup_suffix(backup) - updates = _ui_int(sum(d.rpms for d in last_data)) - print(' %10s %*s %*s %s' % (last_name, hl, _ui_int(len(last_data)), - ul, updates, last_suff)) - if cmd in ("backups-keep", "history-keep"): keep = 8 if len(sys.argv) >= 2: @@ -733,9 +706,10 @@ if cmd == "update": cmd = "update-fast" if cmd == "update-flush": # Get the latest uptime. + # No need to flush caches now, the new playbook should DTRT. os.chdir("/srv/web/infra/ansible/playbooks") - os.system("ansible-playbook generate-updates-uptimes-per-host-file.yml -t updates --flush-cache") -if cmd == "update-fast": # Use ansible FACT cache for uptime. + os.system("ansible-playbook generate-updates-uptimes-per-host-file.yml -t updates") # --flush-cache") +if cmd == "update-fast": # Same as -flush now. os.chdir("/srv/web/infra/ansible/playbooks") os.system("ansible-playbook generate-updates-uptimes-per-host-file.yml -t updates") if cmd == "update-daily-refresh": # Also recreate the main file. @@ -744,7 +718,7 @@ if cmd == "update-daily-refresh": # Also recreate the main file. cmd = "update-daily" if cmd == "update-daily": # Also create backup file. os.chdir("/srv/web/infra/ansible/playbooks") - os.system("ansible-playbook generate-updates-uptimes-per-host-file.yml --flush-cache") + os.system("ansible-playbook generate-updates-uptimes-per-host-file.yml") # Below here are the query commands, stuff needs to exist at this point. if not os.path.exists(fname): @@ -757,11 +731,68 @@ if fname_today is None: if fname_yesterday is None: print(" Warning: History for yesterday does not exist!", file=sys.stderr) +def _backup_suffix(backup): + suffix = '' + if backup == backup_today: + if ident: + suffix = ' (today, is eq)' + else: + suffix = ' (today)' + if backup == backup_yesterday: + suffix = ' (yesterday)' + return suffix + +def _hist_lengths(hosts=None): + # We _could_ open+read+etc each file, just to find out the max updates for + # all hist ... but len("Updates")+2=9 which means 9,999,999 updates) + hl = len("Hosts") + ul = len("Updates") + rl = len("Boots") + if conf_fast_width_history: + ul += 2 + else: + # Whatever, it's less memory than holding all history at once if you want + # to enable it.. + for backup in reversed(backups): + data = list(sorted(lines2datas(bfname2lines(backup)))) + data = filter_name_datas(data, hosts) + updates = _ui_int(sum(d.rpms for d in data)) + hl = max(hl, len(_ui_int(len(data)))) + ul = max(ul, len(updates)) + return hl, ul, rl + +if cmd in ("backups", "hist", "history"): + ident = _backup_today_identical() + print("History:") + last_name = "main" + last_data = list(sorted(fname_datas())) + last_suff = "" + + hl, ul, rl = _hist_lengths() + + print(" %10s %*s %*s %*s %*s %*s" % ("Day", hl, "Hosts", + ul, "Updates", ul, "Avail", ul+1, "Inst.", rl, "Boots")) + + for backup in reversed(backups): + data = list(sorted(lines2datas(bfname2lines(backup)))) + updates = _ui_int(sum(d.rpms for d in last_data)) + ul = max(ul, len(updates)) + cmpds = _ui_diffstats(data.copy(), last_data.copy()) + print(' %10s %*s %*s, %*s %*s, %*s %s' % (last_name, + hl, _ui_int(len(last_data)), + ul, updates, ul, cmpds[0], ul+1, cmpds[1], rl, cmpds[2], last_suff)) + last_name = backup + last_data = data + last_suff = _backup_suffix(backup) + updates = _ui_int(sum(d.rpms for d in last_data)) + print(' %10s %*s %*s %s' % (last_name, hl, _ui_int(len(last_data)), + ul, updates, last_suff)) + def _cli_match_host(data): if len(sys.argv) >= 2: host = sys.argv.pop(1) print("Matching:", host) - data = filter_name_datas(data, host) + data = filter_name_datas(data, [host]) data = list(data) if not data: print("Not host(s) matched:", host) @@ -833,15 +864,19 @@ if cmd == "stats": # _max_update(data) # Do this by hand... _max_len_name = max((len(d.name) for d in data)) - _max_len_rpms = max(len("Updates"), len(_ui_int(updates))) - _max_len_upts = len(_ui_dur(awake)) + _max_len_rpms = max(len("Updates/h"), len(_ui_int(updates))) + _max_len_upts = max(len("Uptime/h"), len(_ui_dur(awake))) _max_len_date = 0 _max_update_correct(' ') - print("%-16s %6s %*s %*s" % ("OS", "Hosts", - _max_len_rpms, "Updates", _max_len_upts, "Uptime")) - print("-" * (16+2+6+1+_max_len_rpms+1+_max_len_upts)) - print("%-16s: %6s %*s %*s" % ("All", _ui_int(len(data)), - _max_len_rpms, _ui_int(updates), _max_len_upts, _ui_dur(awake))) + print("%-16s %6s %*s %*s %*s %*s" % ("OS", "Hosts", + _max_len_rpms, "Updates", _max_len_upts, "Uptime", + _max_len_rpms, "Updates/h", _max_len_upts, "Uptime/h")) + print("-" * (16+2+6+2*(1+_max_len_rpms+1+_max_len_upts))) + nhosts = len(data) + print("%-16s: %6s %*s %*s %*s %*s" % ("All", _ui_int(len(data)), + _max_len_rpms, _ui_int(updates), _max_len_upts, _ui_dur(awake), + _max_len_rpms, _ui_int(updates / nhosts), + _max_len_upts, _ui_dur(awake / nhosts))) subprefix = '' subplen = 12 for osi in sorted(osdata['hosts']): @@ -852,15 +887,21 @@ if cmd == "stats": continue subprefix = ' ' subplen = 12 - print(" %-14s: %6s %*s %*s" % (osi, _ui_int(osdata['hosts'][osi]), + nhosts = osdata['hosts'][osi] + print(" %-14s: %6s %*s %*s %*s %*s" % (osi, _ui_int(nhosts), _max_len_rpms, _ui_int(osdata['updates'][osi]), - _max_len_upts, _ui_dur(osdata['uptimes'][osi]))) + _max_len_upts, _ui_dur(osdata['uptimes'][osi]), + _max_len_rpms, _ui_int(osdata['updates'][osi] / nhosts), + _max_len_upts, _ui_dur(osdata['uptimes'][osi] / nhosts))) if '/' in osi: - print(" %s%-*s: %6s %*s %*s" % (subprefix, subplen, osi, + nhosts = osdata['hosts'][osi] + print(" %s%-*s: %6s %*s %*s %*s %*s" % (subprefix, subplen, osi, _ui_int(osdata['hosts'][osi]), _max_len_rpms, _ui_int(osdata['updates'][osi]), - _max_len_upts, _ui_dur(osdata['uptimes'][osi]))) - print("-" * (16+2+6+1+_max_len_rpms+1+_max_len_upts)) + _max_len_upts, _ui_dur(osdata['uptimes'][osi]), + _max_len_rpms, _ui_int(osdata['updates'][osi] / nhosts), + _max_len_upts, _ui_dur(osdata['uptimes'][osi] / nhosts))) + print("-" * (16+2+6+2*(1+_max_len_rpms+1+_max_len_upts))) # Redo the lengths, because it's real hostname data now... _max_update(data) _max_len_date = 0 @@ -881,34 +922,35 @@ if cmd == "stats": _ui_osinfo(a[2]))) _explain_ui_name() -def _print_info(host, data): - hosts = [] +def _print_info(hosts, data): + fhosts = [] for x in data: - if fnmatch.fnmatch(x.name, host): - hosts.append(x) - if not hosts: + for host in hosts: + if not fnmatch.fnmatch(x.name, host): + continue + fhosts.append(x) + break + if not fhosts: print("Not host(s) matched:", host) sys.exit(2) - for host in hosts: + for host in fhosts: print("Host:", host.name) print(" OS:", host.osinfo) print(" Updates:", _ui_int(host.rpms)) print(" Uptime:", format_duration(host.uptime)) # !ui_dur print(" Checked:", host.date) - print(" Machine:", host.machine_id) - print(" Boot:", host.boot_id) + if conf_info_machine_ids: + print(" Machine:", host.machine_id) + print(" Boot:", host.boot_id) -if cmd in ("host", "info"): - if cmd == "host": - host = "batcave*" - else: - host = "*" +if cmd in ("information", "info"): + hosts = conf_important_hosts.copy() if len(sys.argv) >= 2: - host = sys.argv.pop(1) + hosts = [sys.argv.pop(1)] if len(sys.argv) >= 2 and sys.argv[1] == "all": for b in backups: print("History:", b) - _print_info(host, lines2datas(bfname2lines(b))) + _print_info(hosts, lines2datas(bfname2lines(b))) sys.argv = [sys.argv[0]] while True: _cmp_arg() @@ -916,7 +958,7 @@ if cmd in ("host", "info"): print("History:", sys.argv.pop(1)) else: print("Main:") - _print_info(host, fname1()) + _print_info(hosts, fname1()) if len(sys.argv) < 2: break @@ -928,14 +970,18 @@ def _print_line(prefix, data): _max_len_date, data.date, _ui_osinfo(data))) -if cmd == "list": - host = "*" +if cmd in ("list", "list-n"): + hosts = [] if len(sys.argv) >= 2: - host = sys.argv.pop(1) + if cmd == "list": + hosts = [sys.argv.pop(1)] + else: + hosts = sys.argv[1:] + sys.argv = sys.argv[:1] _cmp_arg() data = fname1() - data = list(filter_name_datas(data, host)) + data = list(filter_name_datas(data, hosts)) _max_update(data) _max_update_correct('') for d1 in data: @@ -959,32 +1005,7 @@ if cmd in ("uptime", "uptime-max", "uptime-min"): _print_line('', d1) _explain_ui_name() -if cmd in ("diff", "diff-u"): - _cmp_arg() - fn1 = fname + '.' + cmp - fn2 = fname - data1 = fname2lines(fn1) - if len(sys.argv) >= 3: - # Doing a diff. between two backups... - if sys.argv[2] == 'today' and fname_today is not None: - fn2 = fname_today - if sys.argv[2] == 'yesterday' and fname_yesterday is not None: - fn2 = fname_yesterday - if sys.argv[2] in backups: - fn2 = fname + '.' + sys.argv[2] - data2 = fname2lines(fn2) - print("diff %s %s" % (fn1, fn2), file=sys.stderr) - data1 = list(sorted(lines2datas(data1))) - data2 = list(sorted(lines2datas(data2))) - if fn2 == fname: - data2 = _maybe_dynamic_uptime(data2) - hosts = _ui_int(len(data2)) - updates = _ui_int(sum(d.rpms for d in data2)) - ul = len(updates) - cmpds = _ui_diffstats(data1.copy(), data2.copy()) - _max_update(data1) - _max_update(data2) - _max_update_correct(' ') +def _diff_hosts(data1, data2, show_both=False, show_utf8=True): while len(data1) > 0 or len(data2) > 0: if len(data1) <= 0: _print_line('+', data2[0]) @@ -1014,14 +1035,14 @@ if cmd in ("diff", "diff-u"): # Name, rpms, and OSname/OSvers are the same if d1 == d2: - if cmd == "diff" and conf_utf8_diff and host_rebooted(d1, d2): + if show_utf8 and conf_utf8_diff and host_rebooted(d1, d2): _print_line(_conf_utf8_boot_ed, d2) continue _print_line(' ', d2) continue # Something about host changed, show old/new... - if cmd == "diff-u": + if show_both: _print_line('-', d1) _print_line('+', d2) continue @@ -1029,9 +1050,9 @@ if cmd in ("diff", "diff-u"): # Something changed, but we only show the new data... if conf_utf8_diff: if False: pass - elif d1.machine_id != d2.machine_id: + elif not _wild_eq(d1.machine_id, d2.machine_id): _print_line(_conf_utf8_diff_hw, d2) - elif d1.osinfo != d2.osinfo: + elif not _wild_info_eq(d1, d2): _print_line(_conf_utf8_diff_os, d2) elif host_rebooted(d1, d2) and d1.rpms > d2.rpms: _print_line(_conf_utf8_boot_up, d2) @@ -1045,7 +1066,85 @@ if cmd in ("diff", "diff-u"): _print_line('!', d2) continue + +if cmd in ("diff", "diff-u"): + _cmp_arg() + fn1 = fname + '.' + cmp + fn2 = fname + data1 = fname2lines(fn1) + if len(sys.argv) >= 3: + # Doing a diff. between two backups... + if sys.argv[2] == 'today' and fname_today is not None: + fn2 = fname_today + if sys.argv[2] == 'yesterday' and fname_yesterday is not None: + fn2 = fname_yesterday + if sys.argv[2] in backups: + fn2 = fname + '.' + sys.argv[2] + data2 = fname2lines(fn2) + print("diff %s %s" % (fn1, fn2), file=sys.stderr) + data1 = list(sorted(lines2datas(data1))) + data2 = list(sorted(lines2datas(data2))) + if fn2 == fname: + data2 = _maybe_dynamic_uptime(data2) + hosts = _ui_int(len(data2)) + updates = _ui_int(sum(d.rpms for d in data2)) + ul = len(updates) + cmpds = _ui_diffstats(data1.copy(), data2.copy()) + _max_update(data1) + _max_update(data2) + _max_update_correct(' ') + + _diff_hosts(data1, data2, show_both=cmd == "diff-u", show_utf8=cmd == "diff") + print('hosts=%s updates=%s (a=%s i=%s) boots=%s' % (hosts, updates, cmpds[0], cmpds[1], cmpds[2])) _explain_ui_name() +# Like diff/history mixed, but for specific hosts... +if cmd in ("hosts", "host", "hosts-u", "host-u"): + hosts = conf_important_hosts.copy() + if len(sys.argv) >= 2: + hosts = sys.argv[1:] + + ident = _backup_today_identical() + print("Hosts history:") + last_name = "main" + last_data = list(sorted(filter_name_datas(fname_datas(), hosts))) + + _max_update(last_data) + if not conf_fast_width_history: + for backup in reversed(backups): + data = filter_name_datas(lines2datas(bfname2lines(backup)), hosts) + data = list(sorted(data)) + _max_update(data) + _max_update_correct(' ') + + done = False + for backup in reversed(backups): + data = filter_name_datas(lines2datas(bfname2lines(backup)), hosts) + data = list(sorted(data)) + + if done: + print("") + done = True + print("Host diff: %s %s" % (backup, last_name), file=sys.stderr) + if not conf_fast_width_history: + _max_update(data) + _max_update_correct(' ') + _diff_hosts(data.copy(), last_data.copy(), show_both=cmd.endswith("-u")) + if False: + cmpds = _ui_diffstats(data.copy(), last_data.copy()) + hostnum = len(last_data) + updates = _ui_int(sum(d.rpms for d in last_data)) + print('hosts=%s updates=%s (a=%s i=%s) boots=%s' % (hostnum, updates, + cmpds[0], cmpds[1], cmpds[2])) + + last_name = backup + last_data = data + if done: + print("") + print("Host data: %s" % (last_name,), file=sys.stderr) + for d1 in last_data: + _print_line(' ', d1) + _explain_ui_name() + diff --git a/playbooks/generate-updates-uptimes-per-host-file.yml b/playbooks/generate-updates-uptimes-per-host-file.yml index 3116b78b9b..fd66544b41 100644 --- a/playbooks/generate-updates-uptimes-per-host-file.yml +++ b/playbooks/generate-updates-uptimes-per-host-file.yml @@ -11,14 +11,12 @@ tasks: -## We do this explicitly because ansible will cache facts, but we don't -## want to uncache all facts just make sure we have the latest uptime -## ** Doesn't seem to make any difference... -# - name: Gather the latest uptime and OS -# ansible.builtin.setup: -# - gather_subset: ["!all", "!min", "hardware"] -# - filter: ["uptime_seconds", "distribution", "distribution_version", "machine_id"] -# tags: updates +# We do this explicitly because ansible will cache facts, but we want +# to make sure we have the latest uptime etc. + - name: Gather the latest uptime and OS + ansible.builtin.setup: + gather_subset: "!all,min,hardware" + tags: updates # This should be in our facts, but I don't see it. Newer ansible? - name: Gather boot-id, if we can @@ -27,7 +25,7 @@ ignore_errors: yes tags: updates - - name: Decode the real boot-id + - name: Decode the real boot-id, or use a default ansible.builtin.set_fact: boot_id: "{{ (boot_id_data.content | b64decode).strip() | default('?') }}" tags: updates