1 #! /usr/bin/env nix-shell
2 #! nix-shell -i "python3 -I" -p "python3.withPackages(p: with p; [ aiohttp rich structlog ])"
4 from argparse
import ArgumentParser
, Namespace
5 from collections
import defaultdict
6 from collections
.abc
import Mapping
, Sequence
7 from enum
import IntEnum
8 from http
import HTTPStatus
9 from pathlib
import Path
10 from typing
import Optional
11 import asyncio
, json
, logging
13 import aiohttp
, structlog
14 from structlog
.contextvars
import bound_contextvars
as log_context
17 LogLevel
= IntEnum('LogLevel', {
18 lvl
: getattr(logging
, lvl
)
19 for lvl
in ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL')
21 LogLevel
.__str
__ = lambda self
: self
.name
24 EXPECTED_STATUS
=frozenset((
25 HTTPStatus
.OK
, HTTPStatus
.FOUND
,
29 async def check(session
: aiohttp
.ClientSession
, manpage
: str, url
: str) -> HTTPStatus
:
30 with
log_context(manpage
=manpage
, url
=url
):
31 logger
.debug("Checking")
32 async with session
.head(url
) as resp
:
33 st
= HTTPStatus(resp
.status
)
35 case HTTPStatus
.OK | HTTPStatus
.FOUND
:
37 case HTTPStatus
.NOT_FOUND
:
38 logger
.error("Broken link!")
40 logger
.info("Unexpected code", status
=st
)
41 case _
if 400 <= st
< 600:
42 logger
.warn("Unexpected error", status
=st
)
46 async def main(urls_path
: Path
) -> Mapping
[HTTPStatus
, int]:
47 logger
.info(f
"Parsing {urls_path}")
48 with urls_path
.open() as urls_file
:
49 urls
= json
.load(urls_file
)
51 count
: defaultdict
[HTTPStatus
, int] = defaultdict(lambda: 0)
53 logger
.info(f
"Checking URLs from {urls_path}")
54 async with aiohttp
.ClientSession() as session
:
55 for status
in asyncio
.as_completed([
56 check(session
, manpage
, url
)
57 for manpage
, url
in urls
.items()
59 count
[await status
]+=1
61 ok
= count
[HTTPStatus
.OK
] + count
[HTTPStatus
.FOUND
]
62 broken
= count
[HTTPStatus
.NOT_FOUND
]
63 unknown
= sum(c
for st
, c
in count
.items() if st
not in EXPECTED_STATUS
)
64 logger
.info(f
"Done: {broken} broken links, "
65 f
"{ok} correct links, and {unknown} unexpected status")
70 def parse_args(args
: Optional
[Sequence
[str]] = None) -> Namespace
:
71 parser
= ArgumentParser(
72 prog
= 'check-manpage-urls',
73 description
= 'Check the validity of the manpage URLs linked in the nixpkgs manual',
77 default
= os
.getenv('LOG_LEVEL', 'INFO'),
78 type = lambda s
: LogLevel
[s
],
79 choices
= list(LogLevel
),
87 return parser
.parse_args(args
)
90 if __name__
== "__main__":
96 wrapper_class
=structlog
.make_filtering_bound_logger(args
.log_level
),
98 logger
= structlog
.getLogger("check-manpage-urls.py")
100 urls_path
= args
.file
101 if urls_path
is None:
102 REPO_ROOT
= Path(__file__
).parent
.parent
.parent
.parent
103 logger
.info(f
"Assuming we are in a nixpkgs repo rooted at {REPO_ROOT}")
105 urls_path
= REPO_ROOT
/ 'doc' / 'manpage-urls.json'
107 count
= asyncio
.run(main(urls_path
))
109 sys
.exit(0 if count
[HTTPStatus
.NOT_FOUND
] == 0 else 1)