Merge pull request #268619 from tweag/lib-descriptions
[NixPkgs.git] / pkgs / development / python-modules / torch / default.nix
blobf89fed489f84e3e1d96cabc4b93bc87c5d6cbede
1 { stdenv, lib, fetchFromGitHub, fetchpatch, buildPythonPackage, python,
2   config, cudaSupport ? config.cudaSupport, cudaPackages, magma,
3   useSystemNccl ? true,
4   MPISupport ? false, mpi,
5   buildDocs ? false,
7   # Native build inputs
8   cmake, linkFarm, symlinkJoin, which, pybind11, removeReferencesTo,
9   pythonRelaxDepsHook,
11   # Build inputs
12   numactl,
13   Accelerate, CoreServices, libobjc,
15   # Propagated build inputs
16   filelock,
17   jinja2,
18   networkx,
19   sympy,
20   numpy, pyyaml, cffi, click, typing-extensions,
21   # ROCm build and `torch.compile` requires `openai-triton`
22   tritonSupport ? (!stdenv.isDarwin), openai-triton,
24   # Unit tests
25   hypothesis, psutil,
27   # Disable MKLDNN on aarch64-darwin, it negatively impacts performance,
28   # this is also what official pytorch build does
29   mklDnnSupport ? !(stdenv.isDarwin && stdenv.isAarch64),
31   # virtual pkg that consistently instantiates blas across nixpkgs
32   # See https://github.com/NixOS/nixpkgs/pull/83888
33   blas,
35   # ninja (https://ninja-build.org) must be available to run C++ extensions tests,
36   ninja,
38   # dependencies for torch.utils.tensorboard
39   pillow, six, future, tensorboard, protobuf,
41   pythonOlder,
43   # ROCm dependencies
44   rocmSupport ? config.rocmSupport,
45   rocmPackages,
46   gpuTargets ? [ ]
49 let
50   inherit (lib) attrsets lists strings trivial;
51   inherit (cudaPackages) cudaFlags cudnn;
53   # Some packages are not available on all platforms
54   nccl = cudaPackages.nccl or null;
56   setBool = v: if v then "1" else "0";
58   # https://github.com/pytorch/pytorch/blob/v2.0.1/torch/utils/cpp_extension.py#L1744
59   supportedTorchCudaCapabilities =
60     let
61       real = ["3.5" "3.7" "5.0" "5.2" "5.3" "6.0" "6.1" "6.2" "7.0" "7.2" "7.5" "8.0" "8.6" "8.7" "8.9" "9.0"];
62       ptx = lists.map (x: "${x}+PTX") real;
63     in
64     real ++ ptx;
66   # NOTE: The lists.subtractLists function is perhaps a bit unintuitive. It subtracts the elements
67   #   of the first list *from* the second list. That means:
68   #   lists.subtractLists a b = b - a
70   # For CUDA
71   supportedCudaCapabilities = lists.intersectLists cudaFlags.cudaCapabilities supportedTorchCudaCapabilities;
72   unsupportedCudaCapabilities = lists.subtractLists supportedCudaCapabilities cudaFlags.cudaCapabilities;
74   # Use trivial.warnIf to print a warning if any unsupported GPU targets are specified.
75   gpuArchWarner = supported: unsupported:
76     trivial.throwIf (supported == [ ])
77       (
78         "No supported GPU targets specified. Requested GPU targets: "
79         + strings.concatStringsSep ", " unsupported
80       )
81       supported;
83   # Create the gpuTargetString.
84   gpuTargetString = strings.concatStringsSep ";" (
85     if gpuTargets != [ ] then
86     # If gpuTargets is specified, it always takes priority.
87       gpuTargets
88     else if cudaSupport then
89       gpuArchWarner supportedCudaCapabilities unsupportedCudaCapabilities
90     else if rocmSupport then
91       rocmPackages.clr.gpuTargets
92     else
93       throw "No GPU targets specified"
94   );
96   rocmtoolkit_joined = symlinkJoin {
97     name = "rocm-merged";
99     paths = with rocmPackages; [
100       rocm-core clr rccl miopen miopengemm rocrand rocblas
101       rocsparse hipsparse rocthrust rocprim hipcub roctracer
102       rocfft rocsolver hipfft hipsolver hipblas
103       rocminfo rocm-thunk rocm-comgr rocm-device-libs
104       rocm-runtime clr.icd hipify
105     ];
107     # Fix `setuptools` not being found
108     postBuild = ''
109       rm -rf $out/nix-support
110     '';
111   };
113   brokenConditions = attrsets.filterAttrs (_: cond: cond) {
114     "CUDA and ROCm are not mutually exclusive" = cudaSupport && rocmSupport;
115     "CUDA is not targeting Linux" = cudaSupport && !stdenv.isLinux;
116     "Unsupported CUDA version" = cudaSupport && !(builtins.elem cudaPackages.cudaMajorVersion [ "11" "12" ]);
117     "MPI cudatoolkit does not match cudaPackages.cudatoolkit" = MPISupport && cudaSupport && (mpi.cudatoolkit != cudaPackages.cudatoolkit);
118     "Magma cudaPackages does not match cudaPackages" = cudaSupport && (magma.cudaPackages != cudaPackages);
119   };
120 in buildPythonPackage rec {
121   pname = "torch";
122   # Don't forget to update torch-bin to the same version.
123   version = "2.0.1";
124   format = "setuptools";
126   disabled = pythonOlder "3.8.0";
128   outputs = [
129     "out" # output standard python package
130     "dev" # output libtorch headers
131     "lib" # output libtorch libraries
132   ];
134   src = fetchFromGitHub {
135     owner = "pytorch";
136     repo = "pytorch";
137     rev = "refs/tags/v${version}";
138     fetchSubmodules = true;
139     hash = "sha256-xUj77yKz3IQ3gd/G32pI4OhL3LoN1zS7eFg0/0nZp5I=";
140   };
142   patches = lib.optionals (stdenv.isDarwin && stdenv.isx86_64) [
143     # pthreadpool added support for Grand Central Dispatch in April
144     # 2020. However, this relies on functionality (DISPATCH_APPLY_AUTO)
145     # that is available starting with macOS 10.13. However, our current
146     # base is 10.12. Until we upgrade, we can fall back on the older
147     # pthread support.
148     ./pthreadpool-disable-gcd.diff
149   ] ++ lib.optionals stdenv.isLinux [
150     # Propagate CUPTI to Kineto by overriding the search path with environment variables.
151     # https://github.com/pytorch/pytorch/pull/108847
152     ./pytorch-pr-108847.patch
153   ];
155   postPatch = lib.optionalString rocmSupport ''
156     # https://github.com/facebookincubator/gloo/pull/297
157     substituteInPlace third_party/gloo/cmake/Hipify.cmake \
158       --replace "\''${HIPIFY_COMMAND}" "python \''${HIPIFY_COMMAND}"
160     # Replace hard-coded rocm paths
161     substituteInPlace caffe2/CMakeLists.txt \
162       --replace "/opt/rocm" "${rocmtoolkit_joined}" \
163       --replace "hcc/include" "hip/include" \
164       --replace "rocblas/include" "include/rocblas" \
165       --replace "hipsparse/include" "include/hipsparse"
167     # Doesn't pick up the environment variable?
168     substituteInPlace third_party/kineto/libkineto/CMakeLists.txt \
169       --replace "\''$ENV{ROCM_SOURCE_DIR}" "${rocmtoolkit_joined}" \
170       --replace "/opt/rocm" "${rocmtoolkit_joined}"
172     # Strangely, this is never set in cmake
173     substituteInPlace cmake/public/LoadHIP.cmake \
174       --replace "set(ROCM_PATH \$ENV{ROCM_PATH})" \
175         "set(ROCM_PATH \$ENV{ROCM_PATH})''\nset(ROCM_VERSION ${lib.concatStrings (lib.intersperse "0" (lib.splitString "." rocmPackages.clr.version))})"
176   ''
177   # Detection of NCCL version doesn't work particularly well when using the static binary.
178   + lib.optionalString cudaSupport ''
179     substituteInPlace cmake/Modules/FindNCCL.cmake \
180       --replace \
181         'message(FATAL_ERROR "Found NCCL header version and library version' \
182         'message(WARNING "Found NCCL header version and library version'
183   ''
184   # TODO(@connorbaker): Remove this patch after 2.1.0 lands.
185   + lib.optionalString cudaSupport ''
186     substituteInPlace torch/utils/cpp_extension.py \
187       --replace \
188         "'8.6', '8.9'" \
189         "'8.6', '8.7', '8.9'"
190   ''
191   # error: no member named 'aligned_alloc' in the global namespace; did you mean simply 'aligned_alloc'
192   # This lib overrided aligned_alloc hence the error message. Tltr: his function is linkable but not in header.
193   + lib.optionalString (stdenv.isDarwin && lib.versionOlder stdenv.hostPlatform.darwinSdkVersion "11.0") ''
194     substituteInPlace third_party/pocketfft/pocketfft_hdronly.h --replace '#if __cplusplus >= 201703L
195     inline void *aligned_alloc(size_t align, size_t size)' '#if __cplusplus >= 201703L && 0
196     inline void *aligned_alloc(size_t align, size_t size)'
197   '';
199   # NOTE(@connorbaker): Though we do not disable Gloo or MPI when building with CUDA support, caution should be taken
200   # when using the different backends. Gloo's GPU support isn't great, and MPI and CUDA can't be used at the same time
201   # without extreme care to ensure they don't lock each other out of shared resources.
202   # For more, see https://github.com/open-mpi/ompi/issues/7733#issuecomment-629806195.
203   preConfigure = lib.optionalString cudaSupport ''
204     export TORCH_CUDA_ARCH_LIST="${gpuTargetString}"
205     export CUDNN_INCLUDE_DIR=${cudnn.dev}/include
206     export CUDNN_LIB_DIR=${cudnn.lib}/lib
207     export CUPTI_INCLUDE_DIR=${cudaPackages.cuda_cupti.dev}/include
208     export CUPTI_LIBRARY_DIR=${cudaPackages.cuda_cupti.lib}/lib
209   '' + lib.optionalString rocmSupport ''
210     export ROCM_PATH=${rocmtoolkit_joined}
211     export ROCM_SOURCE_DIR=${rocmtoolkit_joined}
212     export PYTORCH_ROCM_ARCH="${gpuTargetString}"
213     export CMAKE_CXX_FLAGS="-I${rocmtoolkit_joined}/include -I${rocmtoolkit_joined}/include/rocblas"
214     python tools/amd_build/build_amd.py
215   '';
217   # Use pytorch's custom configurations
218   dontUseCmakeConfigure = true;
220   # causes possible redefinition of _FORTIFY_SOURCE
221   hardeningDisable = [ "fortify3" ];
223   BUILD_NAMEDTENSOR = setBool true;
224   BUILD_DOCS = setBool buildDocs;
226   # We only do an imports check, so do not build tests either.
227   BUILD_TEST = setBool false;
229   # Unlike MKL, oneDNN (née MKLDNN) is FOSS, so we enable support for
230   # it by default. PyTorch currently uses its own vendored version
231   # of oneDNN through Intel iDeep.
232   USE_MKLDNN = setBool mklDnnSupport;
233   USE_MKLDNN_CBLAS = setBool mklDnnSupport;
235   # Avoid using pybind11 from git submodule
236   # Also avoids pytorch exporting the headers of pybind11
237   USE_SYSTEM_PYBIND11 = true;
239   preBuild = ''
240     export MAX_JOBS=$NIX_BUILD_CORES
241     ${python.pythonOnBuildForHost.interpreter} setup.py build --cmake-only
242     ${cmake}/bin/cmake build
243   '';
245   preFixup = ''
246     function join_by { local IFS="$1"; shift; echo "$*"; }
247     function strip2 {
248       IFS=':'
249       read -ra RP <<< $(patchelf --print-rpath $1)
250       IFS=' '
251       RP_NEW=$(join_by : ''${RP[@]:2})
252       patchelf --set-rpath \$ORIGIN:''${RP_NEW} "$1"
253     }
254     for f in $(find ''${out} -name 'libcaffe2*.so')
255     do
256       strip2 $f
257     done
258   '';
260   # Override the (weirdly) wrong version set by default. See
261   # https://github.com/NixOS/nixpkgs/pull/52437#issuecomment-449718038
262   # https://github.com/pytorch/pytorch/blob/v1.0.0/setup.py#L267
263   PYTORCH_BUILD_VERSION = version;
264   PYTORCH_BUILD_NUMBER = 0;
266   USE_NCCL = setBool (nccl != null);
267   USE_SYSTEM_NCCL = setBool useSystemNccl;                  # don't build pytorch's third_party NCCL
268   USE_STATIC_NCCL = setBool useSystemNccl;
270   # Suppress a weird warning in mkl-dnn, part of ideep in pytorch
271   # (upstream seems to have fixed this in the wrong place?)
272   # https://github.com/intel/mkl-dnn/commit/8134d346cdb7fe1695a2aa55771071d455fae0bc
273   # https://github.com/pytorch/pytorch/issues/22346
274   #
275   # Also of interest: pytorch ignores CXXFLAGS uses CFLAGS for both C and C++:
276   # https://github.com/pytorch/pytorch/blob/v1.11.0/setup.py#L17
277   env.NIX_CFLAGS_COMPILE = toString ((lib.optionals (blas.implementation == "mkl") [ "-Wno-error=array-bounds" ]
278   # Suppress gcc regression: avx512 math function raises uninitialized variable warning
279   # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
280   # See also: Fails to compile with GCC 12.1.0 https://github.com/pytorch/pytorch/issues/77939
281   ++ lib.optionals (stdenv.cc.isGNU && lib.versionAtLeast stdenv.cc.version "12.0.0") [
282     "-Wno-error=maybe-uninitialized"
283     "-Wno-error=uninitialized"
284   ]
285   # Since pytorch 2.0:
286   # gcc-12.2.0/include/c++/12.2.0/bits/new_allocator.h:158:33: error: ‘void operator delete(void*, std::size_t)’
287   # ... called on pointer ‘<unknown>’ with nonzero offset [1, 9223372036854775800] [-Werror=free-nonheap-object]
288   ++ lib.optionals (stdenv.cc.isGNU && lib.versions.major stdenv.cc.version == "12" ) [
289     "-Wno-error=free-nonheap-object"
290   ]
291   # .../source/torch/csrc/autograd/generated/python_functions_0.cpp:85:3:
292   # error: cast from ... to ... converts to incompatible function type [-Werror,-Wcast-function-type-strict]
293   ++ lib.optionals (stdenv.cc.isClang && lib.versionAtLeast stdenv.cc.version "16") [
294     "-Wno-error=cast-function-type-strict"
295   # Suppresses the most spammy warnings.
296   # This is mainly to fix https://github.com/NixOS/nixpkgs/issues/266895.
297   ] ++ lib.optionals rocmSupport [
298     "-Wno-#warnings"
299     "-Wno-cpp"
300     "-Wno-unknown-warning-option"
301     "-Wno-ignored-attributes"
302     "-Wno-deprecated-declarations"
303     "-Wno-defaulted-function-deleted"
304     "-Wno-pass-failed"
305   ] ++ [
306     "-Wno-unused-command-line-argument"
307     "-Wno-uninitialized"
308     "-Wno-array-bounds"
309     "-Wno-free-nonheap-object"
310     "-Wno-unused-result"
311   ] ++ lib.optionals stdenv.cc.isGNU [
312     "-Wno-maybe-uninitialized"
313     "-Wno-stringop-overflow"
314   ]));
316   nativeBuildInputs = [
317     cmake
318     which
319     ninja
320     pybind11
321     pythonRelaxDepsHook
322     removeReferencesTo
323   ] ++ lib.optionals cudaSupport (with cudaPackages; [
324     autoAddOpenGLRunpathHook
325     cuda_nvcc
326   ])
327   ++ lib.optionals rocmSupport [ rocmtoolkit_joined ];
329   buildInputs = [ blas blas.provider ]
330     ++ lib.optionals cudaSupport (with cudaPackages; [
331       cuda_cccl.dev # <thrust/*>
332       cuda_cudart # cuda_runtime.h and libraries
333       cuda_cupti.dev # For kineto
334       cuda_cupti.lib # For kineto
335       cuda_nvcc.dev # crt/host_config.h; even though we include this in nativeBuildinputs, it's needed here too
336       cuda_nvml_dev.dev # <nvml.h>
337       cuda_nvrtc.dev
338       cuda_nvrtc.lib
339       cuda_nvtx.dev
340       cuda_nvtx.lib # -llibNVToolsExt
341       cudnn.dev
342       cudnn.lib
343       libcublas.dev
344       libcublas.lib
345       libcufft.dev
346       libcufft.lib
347       libcurand.dev
348       libcurand.lib
349       libcusolver.dev
350       libcusolver.lib
351       libcusparse.dev
352       libcusparse.lib
353     ] ++ lists.optionals (nccl != null) [
354       # Some platforms do not support NCCL (i.e., Jetson)
355       nccl.dev # Provides nccl.h AND a static copy of NCCL!
356     ] ++ lists.optionals (strings.versionOlder cudaVersion "11.8") [
357       cuda_nvprof.dev # <cuda_profiler_api.h>
358     ] ++ lists.optionals (strings.versionAtLeast cudaVersion "11.8") [
359       cuda_profiler_api.dev # <cuda_profiler_api.h>
360     ])
361     ++ lib.optionals rocmSupport [ rocmPackages.llvm.openmp ]
362     ++ lib.optionals (cudaSupport || rocmSupport) [ magma ]
363     ++ lib.optionals stdenv.isLinux [ numactl ]
364     ++ lib.optionals stdenv.isDarwin [ Accelerate CoreServices libobjc ];
366   propagatedBuildInputs = [
367     cffi
368     click
369     numpy
370     pyyaml
372     # From install_requires:
373     filelock
374     typing-extensions
375     sympy
376     networkx
377     jinja2
379     # the following are required for tensorboard support
380     pillow six future tensorboard protobuf
382     # torch/csrc requires `pybind11` at runtime
383     pybind11
384   ]
385   ++ lib.optionals tritonSupport [ openai-triton ]
386   ++ lib.optionals MPISupport [ mpi ]
387   ++ lib.optionals rocmSupport [ rocmtoolkit_joined ];
389   # Tests take a long time and may be flaky, so just sanity-check imports
390   doCheck = false;
392   pythonImportsCheck = [
393     "torch"
394   ];
396   nativeCheckInputs = [ hypothesis ninja psutil ];
398   checkPhase = with lib.versions; with lib.strings; concatStringsSep " " [
399     "runHook preCheck"
400     "${python.interpreter} test/run_test.py"
401     "--exclude"
402     (concatStringsSep " " [
403       "utils" # utils requires git, which is not allowed in the check phase
405       # "dataloader" # psutils correctly finds and triggers multiprocessing, but is too sandboxed to run -- resulting in numerous errors
406       # ^^^^^^^^^^^^ NOTE: while test_dataloader does return errors, these are acceptable errors and do not interfere with the build
408       # tensorboard has acceptable failures for pytorch 1.3.x due to dependencies on tensorboard-plugins
409       (optionalString (majorMinor version == "1.3" ) "tensorboard")
410     ])
411     "runHook postCheck"
412   ];
414   pythonRemoveDeps = [
415     # In our dist-info the name is just "triton"
416     "pytorch-triton-rocm"
417   ];
419   postInstall = ''
420     find "$out/${python.sitePackages}/torch/include" "$out/${python.sitePackages}/torch/lib" -type f -exec remove-references-to -t ${stdenv.cc} '{}' +
422     mkdir $dev
423     cp -r $out/${python.sitePackages}/torch/include $dev/include
424     cp -r $out/${python.sitePackages}/torch/share $dev/share
426     # Fix up library paths for split outputs
427     substituteInPlace \
428       $dev/share/cmake/Torch/TorchConfig.cmake \
429       --replace \''${TORCH_INSTALL_PREFIX}/lib "$lib/lib"
431     substituteInPlace \
432       $dev/share/cmake/Caffe2/Caffe2Targets-release.cmake \
433       --replace \''${_IMPORT_PREFIX}/lib "$lib/lib"
435     mkdir $lib
436     mv $out/${python.sitePackages}/torch/lib $lib/lib
437     ln -s $lib/lib $out/${python.sitePackages}/torch/lib
438   '' + lib.optionalString rocmSupport ''
439     substituteInPlace $dev/share/cmake/Tensorpipe/TensorpipeTargets-release.cmake \
440       --replace "\''${_IMPORT_PREFIX}/lib64" "$lib/lib"
442     substituteInPlace $dev/share/cmake/ATen/ATenConfig.cmake \
443       --replace "/build/source/torch/include" "$dev/include"
444   '';
446   postFixup = lib.optionalString stdenv.isDarwin ''
447     for f in $(ls $lib/lib/*.dylib); do
448         install_name_tool -id $lib/lib/$(basename $f) $f || true
449     done
451     install_name_tool -change @rpath/libshm.dylib $lib/lib/libshm.dylib $lib/lib/libtorch_python.dylib
452     install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libtorch_python.dylib
453     install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch_python.dylib
455     install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libtorch.dylib
457     install_name_tool -change @rpath/libtorch.dylib $lib/lib/libtorch.dylib $lib/lib/libshm.dylib
458     install_name_tool -change @rpath/libc10.dylib $lib/lib/libc10.dylib $lib/lib/libshm.dylib
459   '';
461   # Builds in 2+h with 2 cores, and ~15m with a big-parallel builder.
462   requiredSystemFeatures = [ "big-parallel" ];
464   passthru = {
465     inherit cudaSupport cudaPackages;
466     # At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability.
467     blasProvider = blas.provider;
468     # To help debug when a package is broken due to CUDA support
469     inherit brokenConditions;
470     cudaCapabilities = if cudaSupport then supportedCudaCapabilities else [ ];
471   };
473   meta = with lib; {
474     changelog = "https://github.com/pytorch/pytorch/releases/tag/v${version}";
475     # keep PyTorch in the description so the package can be found under that name on search.nixos.org
476     description = "PyTorch: Tensors and Dynamic neural networks in Python with strong GPU acceleration";
477     homepage = "https://pytorch.org/";
478     license = licenses.bsd3;
479     maintainers = with maintainers; [ teh thoughtpolice tscholak ]; # tscholak esp. for darwin-related builds
480     platforms = with platforms; linux ++ lib.optionals (!cudaSupport && !rocmSupport) darwin;
481     broken = builtins.any trivial.id (builtins.attrValues brokenConditions);
482   };