pkgs/by-name/ol/ollama/package.nix

   1 {
   2   lib,
   3   buildGoModule,
   4   fetchFromGitHub,
   5   buildEnv,
   6   linkFarm,
   7   overrideCC,
   8   makeWrapper,
   9   stdenv,
  10   addDriverRunpath,
  11   nix-update-script,
  12
  13   cmake,
  14   gcc12,
  15   clblast,
  16   libdrm,
  17   rocmPackages,
  18   cudaPackages,
  19   darwin,
  20   autoAddDriverRunpath,
  21
  22   nixosTests,
  23   testers,
  24   ollama,
  25   ollama-rocm,
  26   ollama-cuda,
  27
  28   config,
  29   # one of `[ null false "rocm" "cuda" ]`
  30   acceleration ? null,
  31 }:
  32
  33 assert builtins.elem acceleration [
  34   null
  35   false
  36   "rocm"
  37   "cuda"
  38 ];
  39
  40 let
  41   pname = "ollama";
  42   # don't forget to invalidate all hashes each update
  43   version = "0.3.12";
  44
  45   src = fetchFromGitHub {
  46     owner = "ollama";
  47     repo = "ollama";
  48     rev = "v${version}";
  49     hash = "sha256-K1FYXEP0bTZa8M+V4/SxI+Q+LWs2rsAMZ/ETJCaO7P8=";
  50     fetchSubmodules = true;
  51   };
  52
  53   vendorHash = "sha256-hSxcREAujhvzHVNwnRTfhi0MKI3s8HNavER2VLz6SYk=";
  54
  55   validateFallback = lib.warnIf (config.rocmSupport && config.cudaSupport) (lib.concatStrings [
  56     "both `nixpkgs.config.rocmSupport` and `nixpkgs.config.cudaSupport` are enabled, "
  57     "but they are mutually exclusive; falling back to cpu"
  58   ]) (!(config.rocmSupport && config.cudaSupport));
  59   shouldEnable =
  60     mode: fallback: (acceleration == mode) || (fallback && acceleration == null && validateFallback);
  61
  62   rocmRequested = shouldEnable "rocm" config.rocmSupport;
  63   cudaRequested = shouldEnable "cuda" config.cudaSupport;
  64
  65   enableRocm = rocmRequested && stdenv.hostPlatform.isLinux;
  66   enableCuda = cudaRequested && stdenv.hostPlatform.isLinux;
  67
  68   rocmLibs = [
  69     rocmPackages.clr
  70     rocmPackages.hipblas
  71     rocmPackages.rocblas
  72     rocmPackages.rocsolver
  73     rocmPackages.rocsparse
  74     rocmPackages.rocm-device-libs
  75     rocmPackages.rocm-smi
  76   ];
  77   rocmClang = linkFarm "rocm-clang" { llvm = rocmPackages.llvm.clang; };
  78   rocmPath = buildEnv {
  79     name = "rocm-path";
  80     paths = rocmLibs ++ [ rocmClang ];
  81   };
  82
  83   cudaLibs = [
  84     cudaPackages.cuda_cudart
  85     cudaPackages.libcublas
  86     cudaPackages.cuda_cccl
  87   ];
  88   cudaToolkit = buildEnv {
  89     name = "cuda-merged";
  90     paths = map lib.getLib cudaLibs ++ [
  91       (lib.getOutput "static" cudaPackages.cuda_cudart)
  92       (lib.getBin (cudaPackages.cuda_nvcc.__spliced.buildHost or cudaPackages.cuda_nvcc))
  93     ];
  94   };
  95
  96   metalFrameworks = with darwin.apple_sdk_11_0.frameworks; [
  97     Accelerate
  98     Metal
  99     MetalKit
 100     MetalPerformanceShaders
 101   ];
 102
 103   wrapperOptions =
 104     [
 105       # ollama embeds llama-cpp binaries which actually run the ai models
 106       # these llama-cpp binaries are unaffected by the ollama binary's DT_RUNPATH
 107       # LD_LIBRARY_PATH is temporarily required to use the gpu
 108       # until these llama-cpp binaries can have their runpath patched
 109       "--suffix LD_LIBRARY_PATH : '${addDriverRunpath.driverLink}/lib'"
 110     ]
 111     ++ lib.optionals enableRocm [
 112       "--suffix LD_LIBRARY_PATH : '${rocmPath}/lib'"
 113       "--set-default HIP_PATH '${rocmPath}'"
 114     ]
 115     ++ lib.optionals enableCuda [
 116       "--suffix LD_LIBRARY_PATH : '${lib.makeLibraryPath (map lib.getLib cudaLibs)}'"
 117     ];
 118   wrapperArgs = builtins.concatStringsSep " " wrapperOptions;
 119
 120   goBuild =
 121     if enableCuda then buildGoModule.override { stdenv = overrideCC stdenv gcc12; } else buildGoModule;
 122   inherit (lib) licenses platforms maintainers;
 123 in
 124 goBuild {
 125   inherit
 126     pname
 127     version
 128     src
 129     vendorHash
 130     ;
 131
 132   env =
 133     lib.optionalAttrs enableRocm {
 134       ROCM_PATH = rocmPath;
 135       CLBlast_DIR = "${clblast}/lib/cmake/CLBlast";
 136     }
 137     // lib.optionalAttrs enableCuda { CUDA_LIB_DIR = "${cudaToolkit}/lib"; };
 138
 139   nativeBuildInputs =
 140     [ cmake ]
 141     ++ lib.optionals enableRocm [ rocmPackages.llvm.bintools ]
 142     ++ lib.optionals enableCuda [ cudaPackages.cuda_nvcc ]
 143     ++ lib.optionals (enableRocm || enableCuda) [
 144       makeWrapper
 145       autoAddDriverRunpath
 146     ]
 147     ++ lib.optionals stdenv.hostPlatform.isDarwin metalFrameworks;
 148
 149   buildInputs =
 150     lib.optionals enableRocm (rocmLibs ++ [ libdrm ])
 151     ++ lib.optionals enableCuda cudaLibs
 152     ++ lib.optionals stdenv.hostPlatform.isDarwin metalFrameworks;
 153
 154   patches = [
 155     # disable uses of `git` in the `go generate` script
 156     # ollama's build script assumes the source is a git repo, but nix removes the git directory
 157     # this also disables necessary patches contained in `ollama/llm/patches/`
 158     # those patches are applied in `postPatch`
 159     ./disable-git.patch
 160
 161     # we provide our own deps at runtime
 162     ./skip-rocm-cp.patch
 163   ];
 164
 165   postPatch = ''
 166     # replace inaccurate version number with actual release version
 167     substituteInPlace version/version.go --replace-fail 0.0.0 '${version}'
 168
 169     # apply ollama's patches to `llama.cpp` submodule
 170     for diff in llm/patches/*; do
 171       patch -p1 -d llm/llama.cpp < $diff
 172     done
 173   '';
 174
 175   overrideModAttrs = (
 176     finalAttrs: prevAttrs: {
 177       # don't run llama.cpp build in the module fetch phase
 178       preBuild = "";
 179     }
 180   );
 181
 182   preBuild = ''
 183     # disable uses of `git`, since nix removes the git directory
 184     export OLLAMA_SKIP_PATCHING=true
 185     # build llama.cpp libraries for ollama
 186     go generate ./...
 187   '';
 188
 189   postFixup =
 190     ''
 191       # the app doesn't appear functional at the moment, so hide it
 192       mv "$out/bin/app" "$out/bin/.ollama-app"
 193     ''
 194     + lib.optionalString (enableRocm || enableCuda) ''
 195       # expose runtime libraries necessary to use the gpu
 196       wrapProgram "$out/bin/ollama" ${wrapperArgs}
 197     '';
 198
 199   ldflags = [
 200     "-s"
 201     "-w"
 202     "-X=github.com/ollama/ollama/version.Version=${version}"
 203     "-X=github.com/ollama/ollama/server.mode=release"
 204   ];
 205
 206   passthru = {
 207     tests =
 208       {
 209         inherit ollama;
 210         version = testers.testVersion {
 211           inherit version;
 212           package = ollama;
 213         };
 214       }
 215       // lib.optionalAttrs stdenv.hostPlatform.isLinux {
 216         inherit ollama-rocm ollama-cuda;
 217         service = nixosTests.ollama;
 218         service-cuda = nixosTests.ollama-cuda;
 219         service-rocm = nixosTests.ollama-rocm;
 220       };
 221   } // lib.optionalAttrs (!enableRocm && !enableCuda) { updateScript = nix-update-script { }; };
 222
 223   meta = {
 224     description =
 225       "Get up and running with large language models locally"
 226       + lib.optionalString rocmRequested ", using ROCm for AMD GPU acceleration"
 227       + lib.optionalString cudaRequested ", using CUDA for NVIDIA GPU acceleration";
 228     homepage = "https://github.com/ollama/ollama";
 229     changelog = "https://github.com/ollama/ollama/releases/tag/v${version}";
 230     license = licenses.mit;
 231     platforms = if (rocmRequested || cudaRequested) then platforms.linux else platforms.unix;
 232     mainProgram = "ollama";
 233     maintainers = with maintainers; [
 234       abysssol
 235       dit7ya
 236       elohmeier
 237       roydubnium
 238     ];
 239   };
 240 }