From 861e85e42dd2d2db466c56bf4426610d66bfec04 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Mon, 6 May 2024 15:32:01 +0800 Subject: [PATCH] add a draft of python script to validate all markdown URLs --- Scripts/validate_markdown_links.py | 68 ++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100755 Scripts/validate_markdown_links.py diff --git a/Scripts/validate_markdown_links.py b/Scripts/validate_markdown_links.py new file mode 100755 index 0000000..6abf5c5 --- /dev/null +++ b/Scripts/validate_markdown_links.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 + +from socket import timeout + +import os +import sys +import codecs +import re + +import markdown + +from urllib.request import urlopen +from urllib.error import HTTPError +from urllib.error import URLError + +def check_live_url(url): + + result = False + try: + ret = urlopen(url, timeout=2) + result = (ret.code == 200) + except HTTPError as e: + print(e, file=sys.stderr) + except URLError as e: + print(e, file=sys.stderr) + except timeout as e: + print(e, file=sys.stderr) + except Exception as e: + print(e, file=sys.stderr) + + return result + + +def main(path): + + filenames = [] + for (dirpath, dnames, fnames) in os.walk(path): + for fname in fnames: + if fname.endswith('.md'): + filenames.append(os.sep.join([dirpath, fname])) + + urls = [] + + for filename in filenames: + fd = codecs.open(filename, mode="r", encoding="utf-8") + for line in fd.readlines(): + refs = re.findall(r'(?<=