Merge pull request #312705 from r-ryantm/auto-update/xpipe
[NixPkgs.git] / doc / tests / manpage-urls.py
bloba1ea6d27969e759666da2803bb110edfa839db1f
1 #! /usr/bin/env nix-shell
2 #! nix-shell -i "python3 -I" -p "python3.withPackages(p: with p; [ aiohttp rich structlog ])"
4 from argparse import ArgumentParser, Namespace
5 from collections import defaultdict
6 from collections.abc import Mapping, Sequence
7 from enum import IntEnum
8 from http import HTTPStatus
9 from pathlib import Path
10 from typing import Optional
11 import asyncio, json, logging
13 import aiohttp, structlog
14 from structlog.contextvars import bound_contextvars as log_context
17 LogLevel = IntEnum('LogLevel', {
18 lvl: getattr(logging, lvl)
19 for lvl in ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL')
21 LogLevel.__str__ = lambda self: self.name
24 EXPECTED_STATUS=frozenset((
25 HTTPStatus.OK, HTTPStatus.FOUND,
26 HTTPStatus.NOT_FOUND,
29 async def check(session: aiohttp.ClientSession, manpage: str, url: str) -> HTTPStatus:
30 with log_context(manpage=manpage, url=url):
31 logger.debug("Checking")
32 async with session.head(url) as resp:
33 st = HTTPStatus(resp.status)
34 match st:
35 case HTTPStatus.OK | HTTPStatus.FOUND:
36 logger.debug("OK!")
37 case HTTPStatus.NOT_FOUND:
38 logger.error("Broken link!")
39 case _ if st < 400:
40 logger.info("Unexpected code", status=st)
41 case _ if 400 <= st < 600:
42 logger.warn("Unexpected error", status=st)
44 return st
46 async def main(urls_path: Path) -> Mapping[HTTPStatus, int]:
47 logger.info(f"Parsing {urls_path}")
48 with urls_path.open() as urls_file:
49 urls = json.load(urls_file)
51 count: defaultdict[HTTPStatus, int] = defaultdict(lambda: 0)
53 logger.info(f"Checking URLs from {urls_path}")
54 async with aiohttp.ClientSession() as session:
55 for status in asyncio.as_completed([
56 check(session, manpage, url)
57 for manpage, url in urls.items()
58 ]):
59 count[await status]+=1
61 ok = count[HTTPStatus.OK] + count[HTTPStatus.FOUND]
62 broken = count[HTTPStatus.NOT_FOUND]
63 unknown = sum(c for st, c in count.items() if st not in EXPECTED_STATUS)
64 logger.info(f"Done: {broken} broken links, "
65 f"{ok} correct links, and {unknown} unexpected status")
67 return count
70 def parse_args(args: Optional[Sequence[str]] = None) -> Namespace:
71 parser = ArgumentParser(
72 prog = 'check-manpage-urls',
73 description = 'Check the validity of the manpage URLs linked in the nixpkgs manual',
75 parser.add_argument(
76 '-l', '--log-level',
77 default = os.getenv('LOG_LEVEL', 'INFO'),
78 type = lambda s: LogLevel[s],
79 choices = list(LogLevel),
81 parser.add_argument(
82 'file',
83 type = Path,
84 nargs = '?',
87 return parser.parse_args(args)
90 if __name__ == "__main__":
91 import os, sys
93 args = parse_args()
95 structlog.configure(
96 wrapper_class=structlog.make_filtering_bound_logger(args.log_level),
98 logger = structlog.getLogger("check-manpage-urls.py")
100 urls_path = args.file
101 if urls_path is None:
102 REPO_ROOT = Path(__file__).parent.parent.parent.parent
103 logger.info(f"Assuming we are in a nixpkgs repo rooted at {REPO_ROOT}")
105 urls_path = REPO_ROOT / 'doc' / 'manpage-urls.json'
107 count = asyncio.run(main(urls_path))
109 sys.exit(0 if count[HTTPStatus.NOT_FOUND] == 0 else 1)