pkgs/by-name/lo/local-ai/package.nix

   1 {
   2   config,
   3   callPackages,
   4   stdenv,
   5   lib,
   6   addDriverRunpath,
   7   fetchFromGitHub,
   8   protobuf,
   9   protoc-gen-go,
  10   protoc-gen-go-grpc,
  11   grpc,
  12   openssl,
  13   llama-cpp,
  14   # needed for audio-to-text
  15   ffmpeg,
  16   cmake,
  17   pkg-config,
  18   buildGoModule,
  19   makeWrapper,
  20   ncurses,
  21   which,
  22
  23   enable_upx ? true,
  24   upx,
  25
  26   # apply feature parameter names according to
  27   # https://github.com/NixOS/rfcs/pull/169
  28
  29   # CPU extensions
  30   enable_avx ? true,
  31   enable_avx2 ? true,
  32   enable_avx512 ? stdenv.hostPlatform.avx512Support,
  33   enable_f16c ? true,
  34   enable_fma ? true,
  35
  36   with_openblas ? false,
  37   openblas,
  38
  39   with_cublas ? config.cudaSupport,
  40   cudaPackages,
  41
  42   with_clblas ? false,
  43   clblast,
  44   ocl-icd,
  45   opencl-headers,
  46
  47   with_tinydream ? false, # do not compile with cublas
  48   ncnn,
  49
  50   with_stablediffusion ? true,
  51   opencv,
  52
  53   with_tts ? true,
  54   onnxruntime,
  55   sonic,
  56   spdlog,
  57   fmt,
  58   espeak-ng,
  59   piper-tts,
  60 }:
  61 let
  62   BUILD_TYPE =
  63     assert
  64       (lib.count lib.id [
  65         with_openblas
  66         with_cublas
  67         with_clblas
  68       ]) <= 1;
  69     if with_openblas then
  70       "openblas"
  71     else if with_cublas then
  72       "cublas"
  73     else if with_clblas then
  74       "clblas"
  75     else
  76       "";
  77
  78   inherit (cudaPackages)
  79     libcublas
  80     cuda_nvcc
  81     cuda_cccl
  82     cuda_cudart
  83     libcufft
  84     ;
  85
  86   go-llama = effectiveStdenv.mkDerivation {
  87     name = "go-llama";
  88     src = fetchFromGitHub {
  89       owner = "go-skynet";
  90       repo = "go-llama.cpp";
  91       rev = "2b57a8ae43e4699d3dc5d1496a1ccd42922993be";
  92       hash = "sha256-D6SEg5pPcswGyKAmF4QTJP6/Y1vjRr7m7REguag+too=";
  93       fetchSubmodules = true;
  94     };
  95     buildFlags = [
  96       "libbinding.a"
  97       "BUILD_TYPE=${BUILD_TYPE}"
  98     ];
  99
 100     buildInputs =
 101       [ ]
 102       ++ lib.optionals with_cublas [
 103         cuda_cccl
 104         cuda_cudart
 105         libcublas
 106       ]
 107       ++ lib.optionals with_clblas [
 108         clblast
 109         ocl-icd
 110         opencl-headers
 111       ]
 112       ++ lib.optionals with_openblas [ openblas.dev ];
 113
 114     nativeBuildInputs = [ cmake ] ++ lib.optionals with_cublas [ cuda_nvcc ];
 115
 116     dontUseCmakeConfigure = true;
 117
 118     installPhase = ''
 119       mkdir $out
 120       tar cf - --exclude=build --exclude=CMakeFiles --exclude="*.o" . \
 121         | tar xf - -C $out
 122     '';
 123   };
 124
 125   llama-cpp-rpc =
 126     (llama-cpp-grpc.overrideAttrs (prev: {
 127       name = "llama-cpp-rpc";
 128       cmakeFlags = prev.cmakeFlags ++ [
 129         (lib.cmakeBool "GGML_AVX" false)
 130         (lib.cmakeBool "GGML_AVX2" false)
 131         (lib.cmakeBool "GGML_AVX512" false)
 132         (lib.cmakeBool "GGML_FMA" false)
 133         (lib.cmakeBool "GGML_F16C" false)
 134       ];
 135     })).override
 136       {
 137         cudaSupport = false;
 138         openclSupport = false;
 139         blasSupport = false;
 140         rpcSupport = true;
 141       };
 142
 143   llama-cpp-grpc =
 144     (llama-cpp.overrideAttrs (
 145       final: prev: {
 146         name = "llama-cpp-grpc";
 147         src = fetchFromGitHub {
 148           owner = "ggerganov";
 149           repo = "llama.cpp";
 150           rev = "fc54ef0d1c138133a01933296d50a36a1ab64735";
 151           hash = "sha256-o87EhrA2Oa98pwyb6GSUgwERY0/GWJiX7kvlxDv4zb4=";
 152           fetchSubmodules = true;
 153         };
 154         postPatch =
 155           prev.postPatch
 156           + ''
 157             cd examples
 158             cp -r --no-preserve=mode ${src}/backend/cpp/llama grpc-server
 159             cp llava/clip.* llava/llava.* grpc-server
 160             printf "\nadd_subdirectory(grpc-server)" >> CMakeLists.txt
 161
 162             cp ${src}/backend/backend.proto grpc-server
 163             sed -i grpc-server/CMakeLists.txt \
 164               -e '/get_filename_component/ s;[.\/]*backend/;;' \
 165               -e '$a\install(TARGETS ''${TARGET} RUNTIME)'
 166             cd ..
 167           '';
 168         cmakeFlags = prev.cmakeFlags ++ [
 169           (lib.cmakeBool "BUILD_SHARED_LIBS" false)
 170           (lib.cmakeBool "GGML_AVX" enable_avx)
 171           (lib.cmakeBool "GGML_AVX2" enable_avx2)
 172           (lib.cmakeBool "GGML_AVX512" enable_avx512)
 173           (lib.cmakeBool "GGML_FMA" enable_fma)
 174           (lib.cmakeBool "GGML_F16C" enable_f16c)
 175         ];
 176         buildInputs = prev.buildInputs ++ [
 177           protobuf # provides also abseil_cpp as propagated build input
 178           grpc
 179           openssl
 180         ];
 181       }
 182     )).override
 183       {
 184         cudaSupport = with_cublas;
 185         rocmSupport = false;
 186         openclSupport = with_clblas;
 187         blasSupport = with_openblas;
 188       };
 189
 190   espeak-ng' = espeak-ng.overrideAttrs (self: {
 191     name = "espeak-ng'";
 192     inherit (go-piper) src;
 193     sourceRoot = "${go-piper.src.name}/espeak";
 194     patches = [ ];
 195     nativeBuildInputs = [ cmake ];
 196     cmakeFlags = (self.cmakeFlags or [ ]) ++ [
 197       (lib.cmakeBool "BUILD_SHARED_LIBS" true)
 198       (lib.cmakeBool "USE_ASYNC" false)
 199       (lib.cmakeBool "USE_MBROLA" false)
 200       (lib.cmakeBool "USE_LIBPCAUDIO" false)
 201       (lib.cmakeBool "USE_KLATT" false)
 202       (lib.cmakeBool "USE_SPEECHPLAYER" false)
 203       (lib.cmakeBool "USE_LIBSONIC" false)
 204       (lib.cmakeBool "CMAKE_POSITION_INDEPENDENT_CODE" true)
 205     ];
 206     preConfigure = null;
 207     postInstall = null;
 208   });
 209
 210   piper-phonemize = stdenv.mkDerivation {
 211     name = "piper-phonemize";
 212     inherit (go-piper) src;
 213     sourceRoot = "${go-piper.src.name}/piper-phonemize";
 214     buildInputs = [
 215       espeak-ng'
 216       onnxruntime
 217     ];
 218     nativeBuildInputs = [
 219       cmake
 220       pkg-config
 221     ];
 222     cmakeFlags = [
 223       (lib.cmakeFeature "ONNXRUNTIME_DIR" "${onnxruntime.dev}")
 224       (lib.cmakeFeature "ESPEAK_NG_DIR" "${espeak-ng'}")
 225     ];
 226     passthru.espeak-ng = espeak-ng';
 227   };
 228
 229   piper-tts' = (piper-tts.override { inherit piper-phonemize; }).overrideAttrs (self: {
 230     name = "piper-tts'";
 231     inherit (go-piper) src;
 232     sourceRoot = "${go-piper.src.name}/piper";
 233     installPhase = null;
 234     postInstall = ''
 235       cp CMakeFiles/piper.dir/src/cpp/piper.cpp.o $out/piper.o
 236       cd $out
 237       mkdir bin lib
 238       mv lib*so* lib/
 239       mv piper piper_phonemize bin/
 240       rm -rf cmake pkgconfig espeak-ng-data *.ort
 241     '';
 242   });
 243
 244   go-piper = stdenv.mkDerivation {
 245     name = "go-piper";
 246     src = fetchFromGitHub {
 247       owner = "mudler";
 248       repo = "go-piper";
 249       rev = "9d0100873a7dbb0824dfea40e8cec70a1b110759";
 250       hash = "sha256-Yv9LQkWwGpYdOS0FvtP0vZ0tRyBAx27sdmziBR4U4n8=";
 251       fetchSubmodules = true;
 252     };
 253     postUnpack = ''
 254       cp -r --no-preserve=mode ${piper-tts'}/* source
 255     '';
 256     postPatch = ''
 257       sed -i Makefile \
 258         -e '/CXXFLAGS *= / s;$; -DSPDLOG_FMT_EXTERNAL=1;'
 259     '';
 260     buildFlags = [ "libpiper_binding.a" ];
 261     buildInputs = [
 262       piper-tts'
 263       espeak-ng'
 264       piper-phonemize
 265       sonic
 266       fmt
 267       spdlog
 268       onnxruntime
 269     ];
 270     installPhase = ''
 271       cp -r --no-preserve=mode $src $out
 272       mkdir -p $out/piper-phonemize/pi
 273       cp -r --no-preserve=mode ${piper-phonemize}/share $out/piper-phonemize/pi
 274       cp *.a $out
 275     '';
 276   };
 277
 278   go-rwkv = stdenv.mkDerivation {
 279     name = "go-rwkv";
 280     src = fetchFromGitHub {
 281       owner = "donomii";
 282       repo = "go-rwkv.cpp";
 283       rev = "661e7ae26d442f5cfebd2a0881b44e8c55949ec6";
 284       hash = "sha256-byTNZQSnt7qpBMng3ANJmpISh3GJiz+F15UqfXaz6nQ=";
 285       fetchSubmodules = true;
 286     };
 287     buildFlags = [ "librwkv.a" ];
 288     dontUseCmakeConfigure = true;
 289     nativeBuildInputs = [ cmake ];
 290     installPhase = ''
 291       cp -r --no-preserve=mode $src $out
 292       cp *.a $out
 293     '';
 294   };
 295
 296   # try to merge with openai-whisper-cpp in future
 297   whisper-cpp = effectiveStdenv.mkDerivation {
 298     name = "whisper-cpp";
 299     src = fetchFromGitHub {
 300       owner = "ggerganov";
 301       repo = "whisper.cpp";
 302       rev = "9e3c5345cd46ea718209db53464e426c3fe7a25e";
 303       hash = "sha256-JOptyveuaKRLzeZ6GuB3A70IM7dk4we95g5o25XVXJI=";
 304     };
 305
 306     nativeBuildInputs = [
 307       cmake
 308       pkg-config
 309     ] ++ lib.optionals with_cublas [ cuda_nvcc ];
 310
 311     buildInputs =
 312       [ ]
 313       ++ lib.optionals with_cublas [
 314         cuda_cccl
 315         cuda_cudart
 316         libcublas
 317         libcufft
 318       ]
 319       ++ lib.optionals with_clblas [
 320         clblast
 321         ocl-icd
 322         opencl-headers
 323       ]
 324       ++ lib.optionals with_openblas [ openblas.dev ];
 325
 326     cmakeFlags = [
 327       (lib.cmakeBool "WHISPER_CUDA" with_cublas)
 328       (lib.cmakeBool "WHISPER_CLBLAST" with_clblas)
 329       (lib.cmakeBool "WHISPER_OPENBLAS" with_openblas)
 330       (lib.cmakeBool "WHISPER_NO_AVX" (!enable_avx))
 331       (lib.cmakeBool "WHISPER_NO_AVX2" (!enable_avx2))
 332       (lib.cmakeBool "WHISPER_NO_FMA" (!enable_fma))
 333       (lib.cmakeBool "WHISPER_NO_F16C" (!enable_f16c))
 334       (lib.cmakeBool "BUILD_SHARED_LIBS" false)
 335     ];
 336     postInstall = ''
 337       install -Dt $out/bin bin/*
 338     '';
 339   };
 340
 341   go-bert = stdenv.mkDerivation {
 342     name = "go-bert";
 343     src = fetchFromGitHub {
 344       owner = "go-skynet";
 345       repo = "go-bert.cpp";
 346       rev = "710044b124545415f555e4260d16b146c725a6e4";
 347       hash = "sha256-UNrs3unYjvSzCVaVISFFBDD+s37lmN6/7ajmGNcYgrU=";
 348       fetchSubmodules = true;
 349     };
 350     buildFlags = [ "libgobert.a" ];
 351     dontUseCmakeConfigure = true;
 352     nativeBuildInputs = [ cmake ];
 353     env.NIX_CFLAGS_COMPILE = "-Wformat";
 354     installPhase = ''
 355       cp -r --no-preserve=mode $src $out
 356       cp *.a $out
 357     '';
 358   };
 359
 360   go-stable-diffusion = stdenv.mkDerivation {
 361     name = "go-stable-diffusion";
 362     src = fetchFromGitHub {
 363       owner = "mudler";
 364       repo = "go-stable-diffusion";
 365       rev = "4a3cd6aeae6f66ee57eae9a0075f8c58c3a6a38f";
 366       hash = "sha256-KXUvMP6cDyWib4rG0RmVRm3pgrdsfKXaH3k0v5/mTe8=";
 367       fetchSubmodules = true;
 368     };
 369     buildFlags = [ "libstablediffusion.a" ];
 370     dontUseCmakeConfigure = true;
 371     nativeBuildInputs = [ cmake ];
 372     buildInputs = [ opencv ];
 373     env.NIX_CFLAGS_COMPILE = " -isystem ${opencv}/include/opencv4";
 374     installPhase = ''
 375       mkdir $out
 376       tar cf - --exclude=CMakeFiles --exclude="*.o" --exclude="*.so" --exclude="*.so.*" . \
 377         | tar xf - -C $out
 378     '';
 379   };
 380
 381   go-tiny-dream-ncnn = ncnn.overrideAttrs (self: {
 382     name = "go-tiny-dream-ncnn";
 383     inherit (go-tiny-dream) src;
 384     sourceRoot = "${go-tiny-dream.src.name}/ncnn";
 385     cmakeFlags = self.cmakeFlags ++ [
 386       (lib.cmakeBool "NCNN_SHARED_LIB" false)
 387       (lib.cmakeBool "NCNN_OPENMP" false)
 388       (lib.cmakeBool "NCNN_VULKAN" false)
 389       (lib.cmakeBool "NCNN_AVX" enable_avx)
 390       (lib.cmakeBool "NCNN_AVX2" enable_avx2)
 391       (lib.cmakeBool "NCNN_AVX512" enable_avx512)
 392       (lib.cmakeBool "NCNN_FMA" enable_fma)
 393       (lib.cmakeBool "NCNN_F16C" enable_f16c)
 394     ];
 395   });
 396
 397   go-tiny-dream = effectiveStdenv.mkDerivation {
 398     name = "go-tiny-dream";
 399     src = fetchFromGitHub {
 400       owner = "M0Rf30";
 401       repo = "go-tiny-dream";
 402       rev = "c04fa463ace9d9a6464313aa5f9cd0f953b6c057";
 403       hash = "sha256-uow3vbAI4F/fTGjYOKOLqTpKq7NgGYSZhGlEhn7h6s0=";
 404       fetchSubmodules = true;
 405     };
 406     postUnpack = ''
 407       rm -rf source/ncnn
 408       mkdir -p source/ncnn/build/src
 409       cp -r --no-preserve=mode ${go-tiny-dream-ncnn}/lib/. ${go-tiny-dream-ncnn}/include/. source/ncnn/build/src
 410     '';
 411     buildFlags = [ "libtinydream.a" ];
 412     installPhase = ''
 413       mkdir $out
 414       tar cf - --exclude="*.o" . \
 415         | tar xf - -C $out
 416     '';
 417     meta.broken = lib.versionOlder go-tiny-dream.stdenv.cc.version "13";
 418   };
 419
 420   GO_TAGS =
 421     lib.optional with_tinydream "tinydream"
 422     ++ lib.optional with_tts "tts"
 423     ++ lib.optional with_stablediffusion "stablediffusion";
 424
 425   effectiveStdenv =
 426     if with_cublas then
 427       # It's necessary to consistently use backendStdenv when building with CUDA support,
 428       # otherwise we get libstdc++ errors downstream.
 429       cudaPackages.backendStdenv
 430     else
 431       stdenv;
 432
 433   pname = "local-ai";
 434   version = "2.20.1";
 435   src = fetchFromGitHub {
 436     owner = "go-skynet";
 437     repo = "LocalAI";
 438     rev = "v${version}";
 439     hash = "sha256-FeZZC0Tg9JT9Yj0e27GOLSdHEtWl17AHK3j7epwPyY8=";
 440   };
 441
 442   prepare-sources =
 443     let
 444       cp = "cp -r --no-preserve=mode,ownership";
 445     in
 446     ''
 447       mkdir sources
 448       ${cp} ${go-llama} sources/go-llama.cpp
 449       ${cp} ${if with_tts then go-piper else go-piper.src} sources/go-piper
 450       ${cp} ${go-rwkv} sources/go-rwkv.cpp
 451       ${cp} ${whisper-cpp.src} sources/whisper.cpp
 452       cp ${whisper-cpp}/lib/lib*.a sources/whisper.cpp
 453       ${cp} ${go-bert} sources/go-bert.cpp
 454       ${cp} ${
 455         if with_stablediffusion then go-stable-diffusion else go-stable-diffusion.src
 456       } sources/go-stable-diffusion
 457       ${cp} ${if with_tinydream then go-tiny-dream else go-tiny-dream.src} sources/go-tiny-dream
 458     '';
 459
 460   self = buildGoModule.override { stdenv = effectiveStdenv; } {
 461     inherit pname version src;
 462
 463     vendorHash = "sha256-mDxp5frUIECSHKjxaJVqIP7mnIusvdT45Xlxc9+P5tE=";
 464
 465     env.NIX_CFLAGS_COMPILE = lib.optionalString with_stablediffusion " -isystem ${opencv}/include/opencv4";
 466
 467     postPatch =
 468       ''
 469         sed -i Makefile \
 470           -e '/mod download/ d' \
 471           -e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-fallback/ d' \
 472           -e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-avx/ d' \
 473           -e '/^ALL_GRPC_BACKENDS+=backend-assets\/grpc\/llama-cpp-cuda/ d' \
 474
 475       ''
 476       + lib.optionalString with_cublas ''
 477         sed -i Makefile \
 478           -e '/^CGO_LDFLAGS_WHISPER?=/ s;$;-L${libcufft}/lib -L${cuda_cudart}/lib;'
 479       '';
 480
 481     postConfigure =
 482       prepare-sources
 483       + ''
 484         shopt -s extglob
 485         mkdir -p backend-assets/grpc
 486         cp ${llama-cpp-grpc}/bin/grpc-server backend-assets/grpc/llama-cpp-avx2
 487         cp ${llama-cpp-rpc}/bin/grpc-server backend-assets/grpc/llama-cpp-grpc
 488
 489         mkdir -p backend/cpp/llama/llama.cpp
 490
 491         mkdir -p backend-assets/util
 492         cp ${llama-cpp-rpc}/bin/llama-rpc-server backend-assets/util/llama-cpp-rpc-server
 493
 494         # avoid rebuild of prebuilt make targets
 495         touch backend-assets/grpc/* backend-assets/util/* sources/**/lib*.a
 496       '';
 497
 498     buildInputs =
 499       [ ]
 500       ++ lib.optionals with_cublas [
 501         cuda_cudart
 502         libcublas
 503         libcufft
 504       ]
 505       ++ lib.optionals with_clblas [
 506         clblast
 507         ocl-icd
 508         opencl-headers
 509       ]
 510       ++ lib.optionals with_openblas [ openblas.dev ]
 511       ++ lib.optionals with_stablediffusion go-stable-diffusion.buildInputs
 512       ++ lib.optionals with_tts go-piper.buildInputs;
 513
 514     nativeBuildInputs = [
 515       protobuf
 516       protoc-gen-go
 517       protoc-gen-go-grpc
 518       makeWrapper
 519       ncurses # tput
 520       which
 521     ] ++ lib.optional enable_upx upx ++ lib.optionals with_cublas [ cuda_nvcc ];
 522
 523     enableParallelBuilding = false;
 524
 525     modBuildPhase =
 526       prepare-sources
 527       + ''
 528         make protogen-go
 529         go mod tidy -v
 530       '';
 531
 532     proxyVendor = true;
 533
 534     # should be passed as makeFlags, but build system failes with strings
 535     # containing spaces
 536     env.GO_TAGS = builtins.concatStringsSep " " GO_TAGS;
 537
 538     makeFlags =
 539       [
 540         "VERSION=v${version}"
 541         "BUILD_TYPE=${BUILD_TYPE}"
 542       ]
 543       ++ lib.optional with_cublas "CUDA_LIBPATH=${cuda_cudart}/lib"
 544       ++ lib.optional with_tts "PIPER_CGO_CXXFLAGS=-DSPDLOG_FMT_EXTERNAL=1";
 545
 546     buildPhase = ''
 547       runHook preBuild
 548
 549       local flagsArray=(
 550         ''${enableParallelBuilding:+-j''${NIX_BUILD_CORES}}
 551         SHELL=$SHELL
 552       )
 553       concatTo flagsArray makeFlags makeFlagsArray buildFlags buildFlagsArray
 554       echoCmd 'build flags' "''${flagsArray[@]}"
 555       make build "''${flagsArray[@]}"
 556       unset flagsArray
 557
 558       runHook postBuild
 559     '';
 560
 561     installPhase = ''
 562       runHook preInstall
 563
 564       install -Dt $out/bin ${pname}
 565
 566       runHook postInstall
 567     '';
 568
 569     # patching rpath with patchelf doens't work. The execuable
 570     # raises an segmentation fault
 571     postFixup =
 572       let
 573         LD_LIBRARY_PATH =
 574           [ ]
 575           ++ lib.optionals with_cublas [
 576             # driverLink has to be first to avoid loading the stub version of libcuda.so
 577             # https://github.com/NixOS/nixpkgs/issues/320145#issuecomment-2190319327
 578             addDriverRunpath.driverLink
 579             (lib.getLib libcublas)
 580             cuda_cudart
 581           ]
 582           ++ lib.optionals with_clblas [
 583             clblast
 584             ocl-icd
 585           ]
 586           ++ lib.optionals with_openblas [ openblas ]
 587           ++ lib.optionals with_tts [ piper-phonemize ]
 588           ++ lib.optionals (with_tts && enable_upx) [
 589             fmt
 590             spdlog
 591           ];
 592       in
 593       ''
 594         wrapProgram $out/bin/${pname} \
 595         --prefix LD_LIBRARY_PATH : "${lib.makeLibraryPath LD_LIBRARY_PATH}" \
 596         --prefix PATH : "${ffmpeg}/bin"
 597       '';
 598
 599     passthru.local-packages = {
 600       inherit
 601         go-tiny-dream
 602         go-rwkv
 603         go-bert
 604         go-llama
 605         go-piper
 606         llama-cpp-grpc
 607         whisper-cpp
 608         go-tiny-dream-ncnn
 609         espeak-ng'
 610         piper-phonemize
 611         piper-tts'
 612         llama-cpp-rpc
 613         ;
 614     };
 615
 616     passthru.features = {
 617       inherit
 618         with_cublas
 619         with_openblas
 620         with_tts
 621         with_stablediffusion
 622         with_tinydream
 623         with_clblas
 624         ;
 625     };
 626
 627     passthru.tests = callPackages ./tests.nix { inherit self; };
 628     passthru.lib = callPackages ./lib.nix { };
 629
 630     meta = with lib; {
 631       description = "OpenAI alternative to run local LLMs, image and audio generation";
 632       homepage = "https://localai.io";
 633       license = licenses.mit;
 634       maintainers = with maintainers; [
 635         onny
 636         ck3d
 637       ];
 638       platforms = platforms.linux;
 639     };
 640   };
 641 in
 642 self