mirror of
https://github.com/openmlsys/openmlsys-zh.git
synced 2026-04-01 09:50:23 +08:00
327 lines
10 KiB
Python
327 lines
10 KiB
Python
from __future__ import annotations
|
|
|
|
import argparse
|
|
import re
|
|
from pathlib import Path
|
|
|
|
|
|
TOC_FENCE = "toc"
|
|
EVAL_RST_FENCE = "eval_rst"
|
|
OPTION_LINE_RE = re.compile(r"^:(width|label):`[^`]+`\s*$", re.MULTILINE)
|
|
NUMREF_RE = re.compile(r":numref:`([^`]+)`")
|
|
EQREF_RE = re.compile(r":eqref:`([^`]+)`")
|
|
CITE_RE = re.compile(r":cite:`([^`]+)`")
|
|
RAW_HTML_FILE_RE = re.compile(r"^\s*:file:\s*([^\s]+)\s*$")
|
|
HEAD_TAG_RE = re.compile(r"</?head>", re.IGNORECASE)
|
|
STYLE_BLOCK_RE = re.compile(r"<style>(.*?)</style>", re.IGNORECASE | re.DOTALL)
|
|
FRONTPAGE_LAYOUT_CSS = """
|
|
<style>
|
|
.openmlsys-frontpage {
|
|
width: 100%;
|
|
margin: 0 auto 3rem;
|
|
}
|
|
.openmlsys-frontpage .mdl-grid {
|
|
display: flex;
|
|
flex-wrap: wrap;
|
|
gap: 24px;
|
|
width: 100%;
|
|
box-sizing: border-box;
|
|
}
|
|
.openmlsys-frontpage .mdl-cell {
|
|
box-sizing: border-box;
|
|
flex: 1 1 220px;
|
|
min-width: 0;
|
|
}
|
|
.openmlsys-frontpage .mdl-cell--1-col {
|
|
flex: 0 0 48px;
|
|
}
|
|
.openmlsys-frontpage .mdl-cell--3-col {
|
|
flex: 1 1 calc(25% - 24px);
|
|
max-width: calc(25% - 18px);
|
|
}
|
|
.openmlsys-frontpage .mdl-cell--5-col {
|
|
flex: 1 1 calc(41.666% - 24px);
|
|
max-width: calc(41.666% - 18px);
|
|
}
|
|
.openmlsys-frontpage .mdl-cell--12-col {
|
|
flex: 1 1 100%;
|
|
max-width: 100%;
|
|
}
|
|
.openmlsys-frontpage .mdl-cell--middle {
|
|
align-self: center;
|
|
}
|
|
.openmlsys-frontpage .mdl-color-text--primary {
|
|
color: var(--links, #0b6bcb);
|
|
}
|
|
.openmlsys-frontpage img {
|
|
max-width: 100%;
|
|
height: auto;
|
|
}
|
|
#content,
|
|
.content {
|
|
max-width: min(1440px, calc(100vw - 48px));
|
|
}
|
|
.openmlsys-frontpage + ul,
|
|
.openmlsys-frontpage + ul ul {
|
|
max-width: 960px;
|
|
}
|
|
@media (max-width: 1000px) {
|
|
.openmlsys-frontpage .mdl-cell,
|
|
.openmlsys-frontpage .mdl-cell--1-col,
|
|
.openmlsys-frontpage .mdl-cell--3-col,
|
|
.openmlsys-frontpage .mdl-cell--5-col {
|
|
flex: 1 1 100%;
|
|
max-width: 100%;
|
|
}
|
|
}
|
|
</style>
|
|
""".strip()
|
|
|
|
|
|
def extract_title(markdown: str, fallback: str = "Untitled") -> str:
|
|
lines = markdown.splitlines()
|
|
|
|
for index, line in enumerate(lines):
|
|
stripped = line.strip()
|
|
if not stripped:
|
|
continue
|
|
if stripped.startswith("#"):
|
|
heading = stripped.lstrip("#").strip()
|
|
if heading:
|
|
return heading
|
|
|
|
next_index = index + 1
|
|
if next_index < len(lines):
|
|
underline = lines[next_index].strip()
|
|
if underline and set(underline) <= {"=", "-"}:
|
|
return stripped
|
|
|
|
return fallback
|
|
|
|
|
|
def parse_toc_blocks(markdown: str) -> list[list[str]]:
|
|
blocks: list[list[str]] = []
|
|
lines = markdown.splitlines()
|
|
index = 0
|
|
|
|
while index < len(lines):
|
|
if lines[index].strip() == f"```{TOC_FENCE}":
|
|
index += 1
|
|
entries: list[str] = []
|
|
while index < len(lines) and lines[index].strip() != "```":
|
|
stripped = lines[index].strip()
|
|
if stripped and not stripped.startswith(":"):
|
|
entries.append(stripped)
|
|
index += 1
|
|
blocks.append(entries)
|
|
index += 1
|
|
|
|
return blocks
|
|
|
|
|
|
def resolve_toc_target(current_file: Path, entry: str) -> Path:
|
|
target = (current_file.parent / f"{entry}.md").resolve()
|
|
if not target.exists():
|
|
raise FileNotFoundError(f"TOC entry '{entry}' from '{current_file}' does not exist")
|
|
return target
|
|
|
|
|
|
def relative_link(from_file: Path, target_file: Path) -> str:
|
|
return target_file.relative_to(from_file.parent).as_posix()
|
|
|
|
|
|
def normalize_directives(markdown: str) -> str:
|
|
normalized = OPTION_LINE_RE.sub("", markdown)
|
|
normalized = NUMREF_RE.sub(lambda match: f"`{match.group(1)}`", normalized)
|
|
normalized = EQREF_RE.sub(lambda match: f"`{match.group(1)}`", normalized)
|
|
normalized = CITE_RE.sub(lambda match: f"[{match.group(1)}]", normalized)
|
|
|
|
lines = [line.rstrip() for line in normalized.splitlines()]
|
|
collapsed: list[str] = []
|
|
previous_blank = False
|
|
for line in lines:
|
|
is_blank = line == ""
|
|
if is_blank and previous_blank:
|
|
continue
|
|
collapsed.append(line)
|
|
previous_blank = is_blank
|
|
|
|
while collapsed and collapsed[-1] == "":
|
|
collapsed.pop()
|
|
|
|
return "\n".join(collapsed) + "\n"
|
|
|
|
|
|
def resolve_raw_html_file(current_file: Path, filename: str) -> Path:
|
|
direct = (current_file.parent / filename).resolve()
|
|
if direct.exists():
|
|
return direct
|
|
|
|
static_fallback = (current_file.parent / "static" / filename).resolve()
|
|
if static_fallback.exists():
|
|
return static_fallback
|
|
|
|
repo_static = (Path(__file__).resolve().parent.parent / "static" / filename)
|
|
if repo_static.exists():
|
|
return repo_static
|
|
|
|
raise FileNotFoundError(f"Raw HTML include '{filename}' from '{current_file}' does not exist")
|
|
|
|
|
|
def rewrite_frontpage_assets(html: str) -> str:
|
|
rewritten = html.replace('./_images/', 'static/image/')
|
|
rewritten = rewritten.replace('_images/', 'static/image/')
|
|
rewritten = HEAD_TAG_RE.sub("", rewritten)
|
|
rewritten = STYLE_BLOCK_RE.sub(_minify_style_block, rewritten)
|
|
return rewritten
|
|
|
|
|
|
def _minify_style_block(match: re.Match[str]) -> str:
|
|
content = match.group(1)
|
|
parts = [line.strip() for line in content.splitlines() if line.strip()]
|
|
return f"<style>{' '.join(parts)}</style>"
|
|
|
|
|
|
def wrap_frontpage_html(html: str) -> str:
|
|
return "\n".join([FRONTPAGE_LAYOUT_CSS, '<div class="openmlsys-frontpage">', html.strip(), '</div>'])
|
|
|
|
|
|
def inline_raw_html(block_lines: list[str], current_file: Path) -> str | None:
|
|
stripped = [line.strip() for line in block_lines if line.strip()]
|
|
if not stripped or stripped[0] != ".. raw:: html":
|
|
return None
|
|
|
|
filename: str | None = None
|
|
for line in stripped[1:]:
|
|
match = RAW_HTML_FILE_RE.match(line)
|
|
if match:
|
|
filename = match.group(1)
|
|
break
|
|
|
|
if filename is None:
|
|
return None
|
|
|
|
html_path = resolve_raw_html_file(current_file, filename)
|
|
html = rewrite_frontpage_assets(html_path.read_text(encoding="utf-8")).strip()
|
|
if Path(filename).name == "frontpage.html":
|
|
return wrap_frontpage_html(html)
|
|
return html
|
|
|
|
|
|
def render_toc_list(entries: list[str], current_file: Path, title_cache: dict[Path, str]) -> list[str]:
|
|
rendered: list[str] = []
|
|
for entry in entries:
|
|
target = resolve_toc_target(current_file, entry)
|
|
rendered.append(f"- [{title_cache[target]}]({relative_link(current_file, target)})")
|
|
return rendered
|
|
|
|
|
|
def rewrite_markdown(markdown: str, current_file: Path, title_cache: dict[Path, str]) -> str:
|
|
output: list[str] = []
|
|
lines = markdown.splitlines()
|
|
index = 0
|
|
|
|
while index < len(lines):
|
|
stripped = lines[index].strip()
|
|
if stripped in (f"```{TOC_FENCE}", f"```{EVAL_RST_FENCE}"):
|
|
fence = stripped[3:]
|
|
index += 1
|
|
block_lines: list[str] = []
|
|
while index < len(lines) and lines[index].strip() != "```":
|
|
block_lines.append(lines[index])
|
|
index += 1
|
|
|
|
if fence == TOC_FENCE:
|
|
entries = [line.strip() for line in block_lines if line.strip() and not line.strip().startswith(":")]
|
|
if entries:
|
|
if output and output[-1] != "":
|
|
output.append("")
|
|
output.extend(render_toc_list(entries, current_file, title_cache))
|
|
if output and output[-1] != "":
|
|
output.append("")
|
|
elif fence == EVAL_RST_FENCE:
|
|
raw_html = inline_raw_html(block_lines, current_file)
|
|
if raw_html:
|
|
if output and output[-1] != "":
|
|
output.append("")
|
|
output.extend(raw_html.splitlines())
|
|
if output and output[-1] != "":
|
|
output.append("")
|
|
index += 1
|
|
continue
|
|
|
|
output.append(lines[index])
|
|
index += 1
|
|
|
|
while output and output[-1] == "":
|
|
output.pop()
|
|
|
|
return normalize_directives("\n".join(output) + "\n")
|
|
|
|
|
|
def build_title_cache(source_dir: Path) -> dict[Path, str]:
|
|
cache: dict[Path, str] = {}
|
|
for markdown_file in sorted(source_dir.rglob("*.md")):
|
|
if "_build" in markdown_file.parts or markdown_file.name == "SUMMARY.md":
|
|
continue
|
|
cache[markdown_file.resolve()] = extract_title(markdown_file.read_text(encoding="utf-8"), fallback=markdown_file.stem)
|
|
return cache
|
|
|
|
|
|
def build_summary(source_dir: Path, title_cache: dict[Path, str]) -> str:
|
|
root_index = (source_dir / "index.md").resolve()
|
|
root_markdown = root_index.read_text(encoding="utf-8")
|
|
|
|
lines = ["# Summary", "", f"- [{title_cache[root_index]}](index.md)"]
|
|
seen: set[Path] = {root_index}
|
|
|
|
def append_entry(target: Path, indent: int) -> None:
|
|
target = target.resolve()
|
|
if target in seen:
|
|
return
|
|
seen.add(target)
|
|
rel = target.relative_to(source_dir.resolve()).as_posix()
|
|
lines.append(f"{' ' * indent}- [{title_cache[target]}]({rel})")
|
|
|
|
child_markdown = target.read_text(encoding="utf-8")
|
|
for block in parse_toc_blocks(child_markdown):
|
|
for entry in block:
|
|
append_entry(resolve_toc_target(target, entry), indent + 1)
|
|
|
|
for block in parse_toc_blocks(root_markdown):
|
|
for entry in block:
|
|
append_entry(resolve_toc_target(root_index, entry), 0)
|
|
|
|
return "\n".join(lines) + "\n"
|
|
|
|
|
|
def write_summary(source_dir: Path, summary_path: Path | None = None) -> Path:
|
|
source_dir = source_dir.resolve()
|
|
summary_path = summary_path.resolve() if summary_path else (source_dir / "SUMMARY.md")
|
|
title_cache = build_title_cache(source_dir)
|
|
summary_path.write_text(build_summary(source_dir, title_cache), encoding="utf-8")
|
|
return summary_path
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(description="Generate mdBook SUMMARY.md for zh_chapters.")
|
|
parser.add_argument("--source", type=Path, default=Path("zh_chapters"), help="Source chapter directory")
|
|
parser.add_argument(
|
|
"--summary-output",
|
|
type=Path,
|
|
default=Path("zh_chapters/SUMMARY.md"),
|
|
help="Where to write the generated SUMMARY.md",
|
|
)
|
|
return parser.parse_args()
|
|
|
|
|
|
def main() -> int:
|
|
args = parse_args()
|
|
summary_path = write_summary(args.source, args.summary_output)
|
|
print(f"Wrote mdBook summary to {summary_path}")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|