pkgs/development/python-modules/torch/default.nix

   1 { stdenv, lib, fetchFromGitHub, fetchpatch, buildPythonPackage, python,
   2   config, cudaSupport ? config.cudaSupport, cudaPackages, magma,
   3   useSystemNccl ? true,
   4   MPISupport ? false, mpi,
   5   buildDocs ? false,
   6
   7   # Native build inputs
   8   cmake, linkFarm, symlinkJoin, which, pybind11, removeReferencesTo,
   9   pythonRelaxDepsHook,
  10
  11   # Build inputs
  12   numactl,
  13   Accelerate, CoreServices, libobjc,
  14
  15   # Propagated build inputs
  16   filelock,
  17   jinja2,
  18   networkx,
  19   sympy,
  20   numpy, pyyaml, cffi, click, typing-extensions,
  21   # ROCm build and `torch.compile` requires `openai-triton`
  22   tritonSupport ? (!stdenv.isDarwin), openai-triton,
  23
  24   # Unit tests
  25   hypothesis, psutil,
  26
  27   # Disable MKLDNN on aarch64-darwin, it negatively impacts performance,
  28   # this is also what official pytorch build does
  29   mklDnnSupport ? !(stdenv.isDarwin && stdenv.isAarch64),
  30
  31   # virtual pkg that consistently instantiates blas across nixpkgs
  32   # See https://github.com/NixOS/nixpkgs/pull/83888
  33   blas,
  34
  35   # ninja (https://ninja-build.org) must be available to run C++ extensions tests,
  36   ninja,
  37
  38   # dependencies for torch.utils.tensorboard
  39   pillow, six, future, tensorboard, protobuf,
  40
  41   pythonOlder,
  42
  43   # ROCm dependencies
  44   rocmSupport ? config.rocmSupport,
  45   rocmPackages,
  46   gpuTargets ? [ ]
  47 }:
  48
  49 let
  50   inherit (lib) attrsets lists strings trivial;
  51   inherit (cudaPackages) cudaFlags cudnn;
  52
  53   # Some packages are not available on all platforms
  54   nccl = cudaPackages.nccl or null;
  55
  56   setBool = v: if v then "1" else "0";
  57
  58   # https://github.com/pytorch/pytorch/blob/v2.0.1/torch/utils/cpp_extension.py#L1744
  59   supportedTorchCudaCapabilities =
  60     let
  61       real = ["3.5" "3.7" "5.0" "5.2" "5.3" "6.0" "6.1" "6.2" "7.0" "7.2" "7.5" "8.0" "8.6" "8.7" "8.9" "9.0"];
  62       ptx = lists.map (x: "${x}+PTX") real;
  63     in
  64     real ++ ptx;
  65
  66   # NOTE: The lists.subtractLists function is perhaps a bit unintuitive. It subtracts the elements
  67   #   of the first list *from* the second list. That means:
  68   #   lists.subtractLists a b = b - a
  69
  70   # For CUDA
  71   supportedCudaCapabilities = lists.intersectLists cudaFlags.cudaCapabilities supportedTorchCudaCapabilities;
  72   unsupportedCudaCapabilities = lists.subtractLists supportedCudaCapabilities cudaFlags.cudaCapabilities;
  73
  74   # Use trivial.warnIf to print a warning if any unsupported GPU targets are specified.
  75   gpuArchWarner = supported: unsupported:
  76     trivial.throwIf (supported == [ ])
  77       (
  78         "No supported GPU targets specified. Requested GPU targets: "
  79         + strings.concatStringsSep ", " unsupported
  80       )
  81       supported;
  82
  83   # Create the gpuTargetString.
  84   gpuTargetString = strings.concatStringsSep ";" (
  85     if gpuTargets != [ ] then
  86     # If gpuTargets is specified, it always takes priority.
  87       gpuTargets
  88     else if cudaSupport then
  89       gpuArchWarner supportedCudaCapabilities unsupportedCudaCapabilities
  90     else if rocmSupport then
  91       rocmPackages.clr.gpuTargets
  92     else
  93       throw "No GPU targets specified"
  94   );
  95
  96   rocmtoolkit_joined = symlinkJoin {
  97     name = "rocm-merged";
  98
  99     paths = with rocmPackages; [
 100       rocm-core clr rccl miopen miopengemm rocrand rocblas
 101       rocsparse hipsparse rocthrust rocprim hipcub roctracer
 102       rocfft rocsolver hipfft hipsolver hipblas
 103       rocminfo rocm-thunk rocm-comgr rocm-device-libs
 104       rocm-runtime clr.icd hipify
 105     ];
 106
 107     # Fix `setuptools` not being found
 108     postBuild = ''
 109       rm -rf $out/nix-support
 110     '';
 111   };
 112
 113   brokenConditions = attrsets.filterAttrs (_: cond: cond) {
 114     "CUDA and ROCm are not mutually exclusive" = cudaSupport && rocmSupport;
 115     "CUDA is not targeting Linux" = cudaSupport && !stdenv.isLinux;
 116     "Unsupported CUDA version" = cudaSupport && !(builtins.elem cudaPackages.cudaMajorVersion [ "11" "12" ]);
 117     "MPI cudatoolkit does not match cudaPackages.cudatoolkit" = MPISupport && cudaSupport && (mpi.cudatoolkit != cudaPackages.cudatoolkit);
 118     "Magma cudaPackages does not match cudaPackages" = cudaSupport && (magma.cudaPackages != cudaPackages);
 119   };
 120 in buildPythonPackage rec {
 121   pname = "torch";
 122   # Don't forget to update torch-bin to the same version.
 123   version = "2.0.1";
 124   format = "setuptools";
 125
 126   disabled = pythonOlder "3.8.0";
 127
 128   outputs = [
 129     "out" # output standard python package
 130     "dev" # output libtorch headers
 131     "lib" # output libtorch libraries
 132   ];
 133
 134   src = fetchFromGitHub {
 135     owner = "pytorch";
 136     repo = "pytorch";
 137     rev = "refs/tags/v${version}";
 138     fetchSubmodules = true;
 139     hash = "sha256-xUj77yKz3IQ3gd/G32pI4OhL3LoN1zS7eFg0/0nZp5I=";
 140   };
 141
 142   patches = lib.optionals (stdenv.isDarwin && stdenv.isx86_64) [
 143     # pthreadpool added support for Grand Central Dispatch in April
 144     # 2020. However, this relies on functionality (DISPATCH_APPLY_AUTO)
 145     # that is available starting with macOS 10.13. However, our current
 146     # base is 10.12. Until we upgrade, we can fall back on the older
 147     # pthread support.
 148     ./pthreadpool-disable-gcd.diff
 149   ] ++ lib.optionals stdenv.isLinux [
 150     # Propagate CUPTI to Kineto by overriding the search path with environment variables.
 151     # https://github.com/pytorch/pytorch/pull/108847
 152     ./pytorch-pr-108847.patch
 153   ];
 154
 155   postPatch = lib.optionalString rocmSupport ''
 156     # https://github.com/facebookincubator/gloo/pull/297
 157     substituteInPlace third_party/gloo/cmake/Hipify.cmake \
 158       --replace "\''${HIPIFY_COMMAND}" "python \''${HIPIFY_COMMAND}"
 159
 160     # Replace hard-coded rocm paths
 161     substituteInPlace caffe2/CMakeLists.txt \
 162       --replace "/opt/rocm" "${rocmtoolkit_joined}" \
 163       --replace "hcc/include" "hip/include" \
 164       --replace "rocblas/include" "include/rocblas" \
 165       --replace "hipsparse/include" "include/hipsparse"
 166
 167     # Doesn't pick up the environment variable?
 168     substituteInPlace third_party/kineto/libkineto/CMakeLists.txt \
 169       --replace "\''$ENV{ROCM_SOURCE_DIR}" "${rocmtoolkit_joined}" \
 170       --replace "/opt/rocm" "${rocmtoolkit_joined}"
 171
 172     # Strangely, this is never set in cmake
 173     substituteInPlace cmake/public/LoadHIP.cmake \
 174       --replace "set(ROCM_PATH \$ENV{ROCM_PATH})" \
 175         "set(ROCM_PATH \$ENV{ROCM_PATH})''\nset(ROCM_VERSION ${lib.concatStrings (lib.intersperse "0" (lib.splitString "." rocmPackages.clr.version))})"
 176   ''
 177   # Detection of NCCL version doesn't work particularly well when using the static binary.
 178   + lib.optionalString cudaSupport ''
 179     substituteInPlace cmake/Modules/FindNCCL.cmake \
 180       --replace \
 181         'message(FATAL_ERROR "Found NCCL header version and library version' \
 182         'message(WARNING "Found NCCL header version and library version'
 183   ''
 184   # TODO(@connorbaker): Remove this patch after 2.1.0 lands.
 185   + lib.optionalString cudaSupport ''
 186     substituteInPlace torch/utils/cpp_extension.py \
 187       --replace \
 188         "'8.6', '8.9'" \
 189         "'8.6', '8.7', '8.9'"
 190   ''
 191   # error: no member named 'aligned_alloc' in the global namespace; did you mean simply 'aligned_alloc'
 192   # This lib overrided aligned_alloc hence the error message. Tltr: his function is linkable but not in header.
 193   + lib.optionalString (stdenv.isDarwin && lib.versionOlder stdenv.hostPlatform.darwinSdkVersion "11.0") ''
 194     substituteInPlace third_party/pocketfft/pocketfft_hdronly.h --replace '#if __cplusplus >= 201703L
 195     inline void *aligned_alloc(size_t align, size_t size)' '#if __cplusplus >= 201703L && 0
 196     inline void *aligned_alloc(size_t align, size_t size)'
 197   '';
 198
 199   # NOTE(@connorbaker): Though we do not disable Gloo or MPI when building with CUDA support, caution should be taken
 200   # when using the different backends. Gloo's GPU support isn't great, and MPI and CUDA can't be used at the same time
 201   # without extreme care to ensure they don't lock each other out of shared resources.
 202   # For more, see https://github.com/open-mpi/ompi/issues/7733#issuecomment-629806195.
 203   preConfigure = lib.optionalString cudaSupport ''
 204     export TORCH_CUDA_ARCH_LIST="${gpuTargetString}"
 205     export CUDNN_INCLUDE_DIR=${cudnn.dev}/include
 206     export CUDNN_LIB_DIR=${cudnn.lib}/lib
 207     export CUPTI_INCLUDE_DIR=${cudaPackages.cuda_cupti.dev}/include
 208     export CUPTI_LIBRARY_DIR=${cudaPackages.cuda_cupti.lib}/lib
 209   '' + lib.optionalString rocmSupport ''
 210     export ROCM_PATH=${rocmtoolkit_joined}
 211     export ROCM_SOURCE_DIR=${rocmtoolkit_joined}
 212     export PYTORCH_ROCM_ARCH="${gpuTargetString}"
 213     export CMAKE_CXX_FLAGS="-I${rocmtoolkit_joined}/include -I${rocmtoolkit_joined}/include/rocblas"
 214     python tools/amd_build/build_amd.py
 215   '';
 216
 217   # Use pytorch's custom configurations
 218   dontUseCmakeConfigure = true;
 219
 220   # causes possible redefinition of _FORTIFY_SOURCE
 221   hardeningDisable = [ "fortify3" ];
 222
 223   BUILD_NAMEDTENSOR = setBool true;
 224   BUILD_DOCS = setBool buildDocs;
 225
 226   # We only do an imports check, so do not build tests either.
 227   BUILD_TEST = setBool false;
 228
 229   # Unlike MKL, oneDNN (née MKLDNN) is FOSS, so we enable support for
 230   # it by default. PyTorch currently uses its own vendored version
 231   # of oneDNN through Intel iDeep.
 232   USE_MKLDNN = setBool mklDnnSupport;
 233   USE_MKLDNN_CBLAS = setBool mklDnnSupport;
 234
 235   # Avoid using pybind11 from git submodule
 236   # Also avoids pytorch exporting the headers of pybind11
 237   USE_SYSTEM_PYBIND11 = true;
 238
 239   preBuild = ''
 240     export MAX_JOBS=$NIX_BUILD_CORES
 241     ${python.pythonOnBuildForHost.interpreter} setup.py build --cmake-only
 242     ${cmake}/bin/cmake build
 243   '';
 244
 245   preFixup = ''
 246     function join_by { local IFS="$1"; shift; echo "$*"; }
 247     function strip2 {
 248       IFS=':'
 249       read -ra RP <<< $(patchelf --print-rpath $1)
 250       IFS=' '
 251       RP_NEW=$(join_by : ''${RP[@]:2})
 252       patchelf --set-rpath \$ORIGIN:''${RP_NEW} "$1"
 253     }
 254     for f in $(find ''${out} -name 'libcaffe2*.so')
 255     do
 256       strip2 $f
 257     done
 258   '';
 259
 260   # Override the (weirdly) wrong version set by default. See
 261   # https://github.com/NixOS/nixpkgs/pull/52437#issuecomment-449718038
 262   # https://github.com/pytorch/pytorch/blob/v1.0.0/setup.py#L267
 263   PYTORCH_BUILD_VERSION = version;
 264   PYTORCH_BUILD_NUMBER = 0;
 265
 266   USE_NCCL = setBool (nccl != null);
 267   USE_SYSTEM_NCCL = setBool useSystemNccl;                  # don't build pytorch's third_party NCCL
 268   USE_STATIC_NCCL = setBool useSystemNccl;
 269
 270   # Suppress a weird warning in mkl-dnn, part of ideep in pytorch
 271   # (upstream seems to have fixed this in the wrong place?)
 272   # https://github.com/intel/mkl-dnn/commit/8134d346cdb7fe1695a2aa55771071d455fae0bc
 273   # https://github.com/pytorch/pytorch/issues/22346
 274   #
 275   # Also of interest: pytorch ignores CXXFLAGS uses CFLAGS for both C and C++:
 276   # https://github.com/pytorch/pytorch/blob/v1.11.0/setup.py#L17
 277   env.NIX_CFLAGS_COMPILE = toString ((lib.optionals (blas.implementation == "mkl") [ "-Wno-error=array-bounds" ]
 278   # Suppress gcc regression: avx512 math function raises uninitialized variable warning
 279   # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
 280   # See also: Fails to compile with GCC 12.1.0 https://github.com/pytorch/pytorch/issues/77939
 281   ++ lib.optionals (stdenv.cc.isGNU && lib.versionAtLeast stdenv.cc.version "12.0.0") [
 282     "-Wno-error=maybe-uninitialized"
 283     "-Wno-error=uninitialized"
 284   ]
 285   # Since pytorch 2.0:
 286   # gcc-12.2.0/include/c++/12.2.0/bits/new_allocator.h:158:33: error: ‘void operator delete(void*, std::size_t)’
 287   # ... called on pointer ‘<unknown>’ with nonzero offset [1, 9223372036854775800] [-Werror=free-nonheap-object]
 288   ++ lib.optionals (stdenv.cc.isGNU && lib.versions.major stdenv.cc.version == "12" ) [
 289     "-Wno-error=free-nonheap-object"
 290   ]
 291   # .../source/torch/csrc/autograd/generated/python_functions_0.cpp:85:3:
 292   # error: cast from ... to ... converts to incompatible function type [-Werror,-Wcast-function-type-strict]
 293   ++ lib.optionals (stdenv.cc.isClang && lib.versionAtLeast stdenv.cc.version "16") [
 294     "-Wno-error=cast-function-type-strict"
 295   # Suppresses the most spammy warnings.
 296   # This is mainly to fix https://github.com/NixOS/nixpkgs/issues/266895.
 297   ] ++ lib.optionals rocmSupport [
 298     "-Wno-#warnings"
 299     "-Wno-cpp"
 300     "-Wno-unknown-warning-option"
 301     "-Wno-ignored-attributes"
 302     "-Wno-deprecated-declarations"
 303     "-Wno-defaulted-function-deleted"
 304     "-Wno-pass-failed"
 305   ] ++ [
 306     "-Wno-unused-command-line-argument"
 307     "-Wno-uninitialized"
 308     "-Wno-array-bounds"
 309     "-Wno-free-nonheap-object"
 310     "-Wno-unused-result"
 311   ] ++ lib.optionals stdenv.cc.isGNU [
 312     "-Wno-maybe-uninitialized"
 313     "-Wno-stringop-overflow"
 314   ]));
 315
 316   nativeBuildInputs = [
 317     cmake
 318     which
 319     ninja
 320     pybind11
 321     pythonRelaxDepsHook
 322     removeReferencesTo
 323   ] ++ lib.optionals cudaSupport (with cudaPackages; [
 324     autoAddOpenGLRunpathHook
 325     cuda_nvcc
 326   ])
 327   ++ lib.optionals rocmSupport [ rocmtoolkit_joined ];
 328
 329   buildInputs = [ blas blas.provider ]
 330     ++ lib.optionals cudaSupport (with cudaPackages; [
 331       cuda_cccl.dev # <thrust/*>
 332       cuda_cudart # cuda_runtime.h and libraries
 333       cuda_cupti.dev # For kineto
 334       cuda_cupti.lib # For kineto
 335       cuda_nvcc.dev # crt/host_config.h; even though we include this in nativeBuildinputs, it's needed here too
 336       cuda_nvml_dev.dev # <nvml.h>
 337       cuda_nvrtc.dev
 338       cuda_nvrtc.lib
 339       cuda_nvtx.dev
 340       cuda_nvtx.lib # -llibNVToolsExt
 341       cudnn.dev
 342       cudnn.lib
 343       libcublas.dev
 344       libcublas.lib
 345       libcufft.dev
 346       libcufft.lib
 347       libcurand.dev
 348       libcurand.lib
 349       libcusolver.dev
 350       libcusolver.lib
 351       libcusparse.dev
 352       libcusparse.lib
 353     ] ++ lists.optionals (nccl != null) [
 354       # Some platforms do not support NCCL (i.e., Jetson)
 355       nccl.dev # Provides nccl.h AND a static copy of NCCL!
 356     ] ++ lists.optionals (strings.versionOlder cudaVersion "11.8") [
 357       cuda_nvprof.dev # <cuda_profiler_api.h>
 358     ] ++ lists.optionals (strings.versionAtLeast cudaVersion "11.8") [
 359       cuda_profiler_api.dev # <cuda_profiler_api.h>
 360     ])
 361     ++ lib.optionals rocmSupport [ rocmPackages.llvm.openmp ]
 362     ++ lib.optionals (cudaSupport || rocmSupport) [ magma ]
 363     ++ lib.optionals stdenv.isLinux [ numactl ]
 364     ++ lib.optionals stdenv.isDarwin [ Accelerate CoreServices libobjc ];
 365
 366   propagatedBuildInputs = [
 367     cffi
 368     click
 369     numpy
 370     pyyaml
 371
 372     # From install_requires:
 373     filelock
 374     typing-extensions
 375     sympy
 376     networkx
 377     jinja2
 378
 379     # the following are required for tensorboard support
 380     pillow six future tensorboard protobuf
 381
 382     # torch/csrc requires `pybind11` at runtime
 383     pybind11
 384   ]
 385   ++ lib.optionals tritonSupport [ openai-triton ]
 386   ++ lib.optionals MPISupport [ mpi ]
 387   ++ lib.optionals rocmSupport [ rocmtoolkit_joined ];
 388
 389   # Tests take a long time and may be flaky, so just sanity-check imports
 390   doCheck = false;
 391
 392   pythonImportsCheck = [
 393     "torch"
 394   ];
 395
 396   nativeCheckInputs = [ hypothesis ninja psutil ];
 397
 398   checkPhase = with lib.versions; with lib.strings; concatStringsSep " " [
 399     "runHook preCheck"
 400     "${python.interpreter} test/run_test.py"
 401     "--exclude"
 402     (concatStringsSep " " [
 403       "utils" # utils requires git, which is not allowed in the check phase
 404
 405       # "dataloader" # psutils correctly finds and triggers multiprocessing, but is too sandboxed to run -- resulting in numerous errors
 406       # ^^^^^^^^^^^^ NOTE: while test_dataloader does return errors, these are acceptable errors and do not interfere with the build
 407
 408       # tensorboard has acceptable failures for pytorch 1.3.x due to dependencies on tensorboard-plugins
 409       (optionalString (majorMinor version == "1.3" ) "tensorboard")
 410     ])
 411     "runHook postCheck"
 412   ];
 413
 414   pythonRemoveDeps = [
 415     # In our dist-info the name is just "triton"
 416     "pytorch-triton-rocm"
 417   ];
 418
 419   postInstall = ''
 420     find "$out/${python.sitePackages}/torch/include" "$out/${python.sitePackages}/torch/lib" -type f -exec remove-references-to -t ${stdenv.cc} '{}' +
 421
 422     mkdir $dev
 423     cp -r $out/${python.sitePackages}/torch/include $dev/include
 424     cp -r $out/${python.sitePackages}/torch/share $dev/share
 425
 426     # Fix up library paths for split outputs
 427     substituteInPlace \
 428       $dev/share/cmake/Torch/TorchConfig.cmake \
 429       --replace \''${TORCH_INSTALL_PREFIX}/lib "$lib/lib"
 430
 431     substituteInPlace \
 432       $dev/share/cmake/Caffe2/Caffe2Targets-release.cmake \
 433       --replace \''${_IMPORT_PREFIX}/lib "$lib/lib"
 434
 435     mkdir $lib
 436     mv $out/${python.sitePackages}/torch/lib $lib/lib
 437     ln -s $lib/lib $out/${python.sitePackages}/torch/lib
 438   '' + lib.optionalString rocmSupport ''
 439     substituteInPlace $dev/share/cmake/Tensorpipe/TensorpipeTargets-release.cmake \
 440       --replace "\''${_IMPORT_PREFIX}/lib64" "$lib/lib"
 441
 442     substituteInPlace $dev/share/cmake/ATen/ATenConfig.cmake \
 443       --replace "/build/source/torch/include" "$dev/include"
 444   '';
 445
 446   postFixup = lib.optionalString stdenv.isDarwin ''
 447     for f in $(ls $lib/lib/*.dylib); do
 448         install_name_tool -id $lib/lib/$(basename $f) $f || true
 449     done
 450
 451     install_name_tool -change @rpath/libshm.dylib $lib/lib/libshm.dylib $lib/lib/libtorch_python.dylib
 452     install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libtorch_python.dylib
 453     install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch_python.dylib
 454
 455     install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch.dylib
 456
 457     install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libshm.dylib
 458     install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libshm.dylib
 459   '';
 460
 461   # Builds in 2+h with 2 cores, and ~15m with a big-parallel builder.
 462   requiredSystemFeatures = [ "big-parallel" ];
 463
 464   passthru = {
 465     inherit cudaSupport cudaPackages;
 466     # At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability.
 467     blasProvider = blas.provider;
 468     # To help debug when a package is broken due to CUDA support
 469     inherit brokenConditions;
 470     cudaCapabilities = if cudaSupport then supportedCudaCapabilities else [ ];
 471   };
 472
 473   meta = with lib; {
 474     changelog = "https://github.com/pytorch/pytorch/releases/tag/v${version}";
 475     # keep PyTorch in the description so the package can be found under that name on search.nixos.org
 476     description = "PyTorch: Tensors and Dynamic neural networks in Python with strong GPU acceleration";
 477     homepage = "https://pytorch.org/";
 478     license = licenses.bsd3;
 479     maintainers = with maintainers; [ teh thoughtpolice tscholak ]; # tscholak esp. for darwin-related builds
 480     platforms = with platforms; linux ++ lib.optionals (!cudaSupport && !rocmSupport) darwin;
 481     broken = builtins.any trivial.id (builtins.attrValues brokenConditions);
 482   };
 483 }