pkgs/tools/misc/ollama/default.nix

   1 { lib
   2 , buildGo122Module
   3 , fetchFromGitHub
   4 , fetchpatch
   5 , buildEnv
   6 , linkFarm
   7 , overrideCC
   8 , makeWrapper
   9 , stdenv
  10 , nixosTests
  11
  12 , pkgs
  13 , cmake
  14 , gcc12
  15 , clblast
  16 , libdrm
  17 , rocmPackages
  18 , cudaPackages
  19 , linuxPackages
  20 , darwin
  21
  22   # one of `[ null "rocm" "cuda" ]`
  23 , acceleration ? null
  24
  25 , testers
  26 , ollama
  27 }:
  28
  29 let
  30   pname = "ollama";
  31   version = "0.1.29";
  32   src = fetchFromGitHub {
  33     owner = "jmorganca";
  34     repo = "ollama";
  35     rev = "v${version}";
  36     hash = "sha256-M2G53DJF/22ZVCAb4jGjyErKO6q2argehHSV7AEef6w=";
  37     fetchSubmodules = true;
  38   };
  39
  40   validAccel = lib.assertOneOf "ollama.acceleration" acceleration [ null "rocm" "cuda" ];
  41
  42   warnIfNotLinux = api: (lib.warnIfNot stdenv.isLinux
  43     "building ollama with `${api}` is only supported on linux; falling back to cpu"
  44     stdenv.isLinux);
  45   enableRocm = validAccel && (acceleration == "rocm") && (warnIfNotLinux "rocm");
  46   enableCuda = validAccel && (acceleration == "cuda") && (warnIfNotLinux "cuda");
  47
  48   rocmClang = linkFarm "rocm-clang" {
  49     llvm = rocmPackages.llvm.clang;
  50   };
  51   rocmPath = buildEnv {
  52     name = "rocm-path";
  53     paths = [
  54       rocmPackages.clr
  55       rocmPackages.hipblas
  56       rocmPackages.rocblas
  57       rocmPackages.rocsolver
  58       rocmPackages.rocsparse
  59       rocmPackages.rocm-device-libs
  60       rocmClang
  61     ];
  62   };
  63
  64   cudaToolkit = buildEnv {
  65     name = "cuda-toolkit";
  66     ignoreCollisions = true; # FIXME: find a cleaner way to do this without ignoring collisions
  67     paths = [
  68       cudaPackages.cudatoolkit
  69       cudaPackages.cuda_cudart
  70     ];
  71   };
  72
  73   runtimeLibs = lib.optionals enableRocm [
  74     rocmPackages.rocm-smi
  75   ] ++ lib.optionals enableCuda [
  76     linuxPackages.nvidia_x11
  77   ];
  78
  79   appleFrameworks = darwin.apple_sdk_11_0.frameworks;
  80   metalFrameworks = [
  81     appleFrameworks.Accelerate
  82     appleFrameworks.Metal
  83     appleFrameworks.MetalKit
  84     appleFrameworks.MetalPerformanceShaders
  85   ];
  86
  87
  88   goBuild =
  89     if enableCuda then
  90       buildGo122Module.override { stdenv = overrideCC stdenv gcc12; }
  91     else
  92       buildGo122Module;
  93   preparePatch = patch: hash: fetchpatch {
  94     url = "file://${src}/llm/patches/${patch}";
  95     inherit hash;
  96     stripLen = 1;
  97     extraPrefix = "llm/llama.cpp/";
  98   };
  99   inherit (lib) licenses platforms maintainers;
 100 in
 101 goBuild ((lib.optionalAttrs enableRocm {
 102   ROCM_PATH = rocmPath;
 103   CLBlast_DIR = "${clblast}/lib/cmake/CLBlast";
 104 }) // (lib.optionalAttrs enableCuda {
 105   CUDA_LIB_DIR = "${cudaToolkit}/lib";
 106   CUDACXX = "${cudaToolkit}/bin/nvcc";
 107   CUDAToolkit_ROOT = cudaToolkit;
 108 }) // {
 109   inherit pname version src;
 110   vendorHash = "sha256-Lj7CBvS51RqF63c01cOCgY7BCQeCKGu794qzb/S80C0=";
 111
 112   nativeBuildInputs = [
 113     cmake
 114   ] ++ lib.optionals (enableRocm || enableCuda) [
 115     makeWrapper
 116   ] ++ lib.optionals stdenv.isDarwin
 117     metalFrameworks;
 118
 119   buildInputs = lib.optionals enableRocm [
 120     rocmPackages.clr
 121     rocmPackages.hipblas
 122     rocmPackages.rocblas
 123     rocmPackages.rocsolver
 124     rocmPackages.rocsparse
 125     libdrm
 126   ] ++ lib.optionals enableCuda [
 127     cudaPackages.cuda_cudart
 128   ] ++ lib.optionals stdenv.isDarwin
 129     metalFrameworks;
 130
 131   patches = [
 132     # remove uses of `git` in the `go generate` script
 133     # instead use `patch` where necessary
 134     ./remove-git.patch
 135     # replace a hardcoded use of `g++` with `$CXX`
 136     ./replace-gcc.patch
 137
 138     # ollama's patches of llama.cpp's example server
 139     # `ollama/llm/generate/gen_common.sh` -> "apply temporary patches until fix is upstream"
 140     (preparePatch "01-cache.diff" "sha256-VDwu/iK6taBCyscpndQiOJ3eGqonnLVwmS2rJNMBVGU=")
 141     (preparePatch "02-cudaleaks.diff" "sha256-nxsWgrePUMsZBWWQAjqVHWMJPzr1owH1zSJvUU7Q5pA=")
 142     (preparePatch "03-load_exception.diff" "sha256-1DfNahFYYxqlx4E4pwMKQpL+XR0bibYnDFGt6dCL4TM=")
 143     (preparePatch "04-locale.diff" "sha256-r5nHiP6yN/rQObRu2FZIPBKpKP9yByyZ6sSI2SKj6Do=")
 144     (preparePatch "05-fix-clip-free.diff" "sha256-EFZ+QTtZCvstVxYgVdFKHsQqdkL98T0eXOEBOqCrlL4=")
 145   ];
 146   postPatch = ''
 147     # use a patch from the nix store in the `go generate` script
 148     substituteInPlace llm/generate/gen_common.sh \
 149       --subst-var-by cmakeIncludePatch '${./cmake-include.patch}'
 150     # `ollama/llm/generate/gen_common.sh` -> "avoid duplicate main symbols when we link into the cgo binary"
 151     substituteInPlace llm/llama.cpp/examples/server/server.cpp \
 152       --replace-fail 'int main(' 'int __main('
 153     # replace inaccurate version number with actual release version
 154     substituteInPlace version/version.go --replace-fail 0.0.0 '${version}'
 155   '';
 156   preBuild = ''
 157     export OLLAMA_SKIP_PATCHING=true
 158     # build llama.cpp libraries for ollama
 159     go generate ./...
 160   '';
 161   postFixup = ''
 162     # the app doesn't appear functional at the moment, so hide it
 163     mv "$out/bin/app" "$out/bin/.ollama-app"
 164   '' + lib.optionalString (enableRocm || enableCuda) ''
 165     # expose runtime libraries necessary to use the gpu
 166     mv "$out/bin/ollama" "$out/bin/.ollama-unwrapped"
 167     makeWrapper "$out/bin/.ollama-unwrapped" "$out/bin/ollama" \
 168       --suffix LD_LIBRARY_PATH : '/run/opengl-driver/lib:${lib.makeLibraryPath runtimeLibs}' '' + lib.optionalString enableRocm ''\
 169       --set-default HIP_PATH ${pkgs.rocmPackages.meta.rocm-hip-libraries}
 170   '';
 171
 172   ldflags = [
 173     "-s"
 174     "-w"
 175     "-X=github.com/jmorganca/ollama/version.Version=${version}"
 176     "-X=github.com/jmorganca/ollama/server.mode=release"
 177   ];
 178
 179   passthru.tests = {
 180     service = nixosTests.ollama;
 181     rocm = pkgs.ollama.override { acceleration = "rocm"; };
 182     cuda = pkgs.ollama.override { acceleration = "cuda"; };
 183     version = testers.testVersion {
 184       inherit version;
 185       package = ollama;
 186     };
 187   };
 188
 189   meta = {
 190     changelog = "https://github.com/ollama/ollama/releases/tag/v${version}";
 191     description = "Get up and running with large language models locally";
 192     homepage = "https://github.com/jmorganca/ollama";
 193     license = licenses.mit;
 194     platforms = platforms.unix;
 195     mainProgram = "ollama";
 196     maintainers = with maintainers; [ abysssol dit7ya elohmeier ];
 197   };
 198 })