17 buildPythonPackage rec {
18 pname = "trafilatura";
22 disabled = pythonOlder "3.9";
25 inherit pname version;
26 hash = "sha256-iYkdtkbdhNmPs0ovrte6hMIuVJAAe1h9BZkDbTUWR2A=";
29 # Patch out gui cli because it is not supported in this packaging and
30 # nixify path to the trafilatura binary in the test suite
32 substituteInPlace setup.py \
33 --replace-fail '"trafilatura_gui=trafilatura.gui:main",' ""
34 substituteInPlace tests/cli_tests.py \
35 --replace-fail 'trafilatura_bin = "trafilatura"' \
36 'trafilatura_bin = "${placeholder "out"}/bin/trafilatura"'
39 build-system = [ setuptools ];
51 nativeCheckInputs = [ pytestCheckHook ];
54 # Disable tests that require an internet connection
61 "test_meta_redirections"
68 pythonImportsCheck = [ "trafilatura" ];
71 description = "Python package and command-line tool designed to gather text on the Web";
72 homepage = "https://trafilatura.readthedocs.io";
73 changelog = "https://github.com/adbar/trafilatura/blob/v${version}/HISTORY.md";
74 license = lib.licenses.asl20;
75 maintainers = with lib.maintainers; [ jokatzke ];
76 mainProgram = "trafilatura";