pkgs/by-name/ol/ollama/package.nix

   1 {
   2   lib,
   3   buildGoModule,
   4   fetchFromGitHub,
   5   buildEnv,
   6   linkFarm,
   7   overrideCC,
   8   makeWrapper,
   9   stdenv,
  10   addDriverRunpath,
  11   nix-update-script,
  12
  13   cmake,
  14   gcc12,
  15   gitMinimal,
  16   clblast,
  17   libdrm,
  18   rocmPackages,
  19   cudaPackages,
  20   darwin,
  21   autoAddDriverRunpath,
  22
  23   nixosTests,
  24   testers,
  25   ollama,
  26   ollama-rocm,
  27   ollama-cuda,
  28
  29   config,
  30   # one of `[ null false "rocm" "cuda" ]`
  31   acceleration ? null,
  32 }:
  33
  34 assert builtins.elem acceleration [
  35   null
  36   false
  37   "rocm"
  38   "cuda"
  39 ];
  40
  41 let
  42   pname = "ollama";
  43   # don't forget to invalidate all hashes each update
  44   version = "0.4.7";
  45
  46   src = fetchFromGitHub {
  47     owner = "ollama";
  48     repo = "ollama";
  49     rev = "v${version}";
  50     hash = "sha256-X1xGo054+MpThvZzeg/F+IBczY1wn/gTjcq+j1JztDg=";
  51     fetchSubmodules = true;
  52   };
  53
  54   vendorHash = "sha256-xz9v91Im6xTLPzmYoVecdF7XiPKBZk3qou1SGokgPXQ=";
  55
  56   validateFallback = lib.warnIf (config.rocmSupport && config.cudaSupport) (lib.concatStrings [
  57     "both `nixpkgs.config.rocmSupport` and `nixpkgs.config.cudaSupport` are enabled, "
  58     "but they are mutually exclusive; falling back to cpu"
  59   ]) (!(config.rocmSupport && config.cudaSupport));
  60   shouldEnable =
  61     mode: fallback: (acceleration == mode) || (fallback && acceleration == null && validateFallback);
  62
  63   rocmRequested = shouldEnable "rocm" config.rocmSupport;
  64   cudaRequested = shouldEnable "cuda" config.cudaSupport;
  65
  66   enableRocm = rocmRequested && stdenv.hostPlatform.isLinux;
  67   enableCuda = cudaRequested && stdenv.hostPlatform.isLinux;
  68
  69   rocmLibs = [
  70     rocmPackages.clr
  71     rocmPackages.hipblas
  72     rocmPackages.rocblas
  73     rocmPackages.rocsolver
  74     rocmPackages.rocsparse
  75     rocmPackages.rocm-device-libs
  76     rocmPackages.rocm-smi
  77   ];
  78   rocmClang = linkFarm "rocm-clang" { llvm = rocmPackages.llvm.clang; };
  79   rocmPath = buildEnv {
  80     name = "rocm-path";
  81     paths = rocmLibs ++ [ rocmClang ];
  82   };
  83
  84   cudaLibs = [
  85     cudaPackages.cuda_cudart
  86     cudaPackages.libcublas
  87     cudaPackages.cuda_cccl
  88   ];
  89
  90   # Extract the major version of CUDA. e.g. 11 12
  91   cudaMajorVersion = lib.versions.major cudaPackages.cuda_cudart.version;
  92
  93   cudaToolkit = buildEnv {
  94     # ollama hardcodes the major version in the Makefile to support different variants.
  95     # - https://github.com/ollama/ollama/blob/v0.4.4/llama/Makefile#L17-L18
  96     name = "cuda-merged-${cudaMajorVersion}";
  97     paths = map lib.getLib cudaLibs ++ [
  98       (lib.getOutput "static" cudaPackages.cuda_cudart)
  99       (lib.getBin (cudaPackages.cuda_nvcc.__spliced.buildHost or cudaPackages.cuda_nvcc))
 100     ];
 101   };
 102
 103   cudaPath = lib.removeSuffix "-${cudaMajorVersion}" cudaToolkit;
 104
 105   metalFrameworks = with darwin.apple_sdk_11_0.frameworks; [
 106     Accelerate
 107     Metal
 108     MetalKit
 109     MetalPerformanceShaders
 110   ];
 111
 112   wrapperOptions =
 113     [
 114       # ollama embeds llama-cpp binaries which actually run the ai models
 115       # these llama-cpp binaries are unaffected by the ollama binary's DT_RUNPATH
 116       # LD_LIBRARY_PATH is temporarily required to use the gpu
 117       # until these llama-cpp binaries can have their runpath patched
 118       "--suffix LD_LIBRARY_PATH : '${addDriverRunpath.driverLink}/lib'"
 119     ]
 120     ++ lib.optionals enableRocm [
 121       "--suffix LD_LIBRARY_PATH : '${rocmPath}/lib'"
 122       "--set-default HIP_PATH '${rocmPath}'"
 123     ]
 124     ++ lib.optionals enableCuda [
 125       "--suffix LD_LIBRARY_PATH : '${lib.makeLibraryPath (map lib.getLib cudaLibs)}'"
 126     ];
 127   wrapperArgs = builtins.concatStringsSep " " wrapperOptions;
 128
 129   goBuild =
 130     if enableCuda then buildGoModule.override { stdenv = overrideCC stdenv gcc12; } else buildGoModule;
 131   inherit (lib) licenses platforms maintainers;
 132 in
 133 goBuild {
 134   inherit
 135     pname
 136     version
 137     src
 138     vendorHash
 139     ;
 140
 141   env =
 142     lib.optionalAttrs enableRocm {
 143       ROCM_PATH = rocmPath;
 144       CLBlast_DIR = "${clblast}/lib/cmake/CLBlast";
 145       HIP_PATH = rocmPath;
 146     }
 147     // lib.optionalAttrs enableCuda {
 148       CUDA_PATH = cudaPath;
 149     };
 150
 151   nativeBuildInputs =
 152     [
 153       cmake
 154       gitMinimal
 155     ]
 156     ++ lib.optionals enableRocm [
 157       rocmPackages.llvm.bintools
 158       rocmLibs
 159     ]
 160     ++ lib.optionals enableCuda [ cudaPackages.cuda_nvcc ]
 161     ++ lib.optionals (enableRocm || enableCuda) [
 162       makeWrapper
 163       autoAddDriverRunpath
 164     ]
 165     ++ lib.optionals stdenv.hostPlatform.isDarwin metalFrameworks;
 166
 167   buildInputs =
 168     lib.optionals enableRocm (rocmLibs ++ [ libdrm ])
 169     ++ lib.optionals enableCuda cudaLibs
 170     ++ lib.optionals stdenv.hostPlatform.isDarwin metalFrameworks;
 171
 172   patches = [
 173     # ollama's build script is unable to find hipcc
 174     ./rocm.patch
 175   ];
 176
 177   postPatch = ''
 178     # replace inaccurate version number with actual release version
 179     substituteInPlace version/version.go --replace-fail 0.0.0 '${version}'
 180   '';
 181
 182   overrideModAttrs = (
 183     finalAttrs: prevAttrs: {
 184       # don't run llama.cpp build in the module fetch phase
 185       preBuild = "";
 186     }
 187   );
 188
 189   preBuild = ''
 190     # build llama.cpp libraries for ollama
 191     make -j $NIX_BUILD_CORES
 192   '';
 193
 194   postInstall = lib.optionalString stdenv.hostPlatform.isLinux ''
 195     # copy libggml_*.so and runners into lib
 196     # https://github.com/ollama/ollama/blob/v0.4.4/llama/make/gpu.make#L90
 197     mkdir -p $out/lib
 198     cp -r dist/*/lib/* $out/lib/
 199   '';
 200
 201   postFixup =
 202     ''
 203       # the app doesn't appear functional at the moment, so hide it
 204       mv "$out/bin/app" "$out/bin/.ollama-app"
 205     ''
 206     + lib.optionalString (enableRocm || enableCuda) ''
 207       # expose runtime libraries necessary to use the gpu
 208       wrapProgram "$out/bin/ollama" ${wrapperArgs}
 209     '';
 210
 211   ldflags = [
 212     "-s"
 213     "-w"
 214     "-X=github.com/ollama/ollama/version.Version=${version}"
 215     "-X=github.com/ollama/ollama/server.mode=release"
 216   ];
 217
 218   passthru = {
 219     tests =
 220       {
 221         inherit ollama;
 222         version = testers.testVersion {
 223           inherit version;
 224           package = ollama;
 225         };
 226       }
 227       // lib.optionalAttrs stdenv.hostPlatform.isLinux {
 228         inherit ollama-rocm ollama-cuda;
 229         service = nixosTests.ollama;
 230         service-cuda = nixosTests.ollama-cuda;
 231         service-rocm = nixosTests.ollama-rocm;
 232       };
 233   } // lib.optionalAttrs (!enableRocm && !enableCuda) { updateScript = nix-update-script { }; };
 234
 235   meta = {
 236     description =
 237       "Get up and running with large language models locally"
 238       + lib.optionalString rocmRequested ", using ROCm for AMD GPU acceleration"
 239       + lib.optionalString cudaRequested ", using CUDA for NVIDIA GPU acceleration";
 240     homepage = "https://github.com/ollama/ollama";
 241     changelog = "https://github.com/ollama/ollama/releases/tag/v${version}";
 242     license = licenses.mit;
 243     platforms = if (rocmRequested || cudaRequested) then platforms.linux else platforms.unix;
 244     mainProgram = "ollama";
 245     maintainers = with maintainers; [
 246       abysssol
 247       dit7ya
 248       elohmeier
 249       roydubnium
 250     ];
 251   };
 252 }