25 assert lib.elem stdenv.system [ "x86_64-linux" "x86_64-darwin" "aarch64-linux" "aarch64-darwin" ];
28 common = { pname, platformAttrs, jdk, tests }:
29 stdenv.mkDerivation (finalAttrs: {
31 version = platformAttrs.${stdenv.system}.version or (throw "Unsupported system: ${stdenv.system}");
33 url = "mirror://apache/hadoop/common/hadoop-${finalAttrs.version}/hadoop-${finalAttrs.version}"
34 + lib.optionalString stdenv.hostPlatform.isAarch64 "-aarch64" + ".tar.gz";
35 inherit (platformAttrs.${stdenv.system} or (throw "Unsupported system: ${stdenv.system}")) hash;
39 # Build the container executor binary from source
40 # InstallPhase is not lazily evaluating containerExecutor for some reason
41 containerExecutor = if stdenv.hostPlatform.isLinux then (callPackage ./containerExecutor.nix {
42 inherit (finalAttrs) version;
43 inherit platformAttrs;
46 nativeBuildInputs = [ makeWrapper ]
47 ++ lib.optionals stdenv.hostPlatform.isLinux [ autoPatchelfHook ];
48 buildInputs = lib.optionals stdenv.hostPlatform.isLinux [ (lib.getLib stdenv.cc.cc) openssl protobuf zlib snappy libtirpc ];
53 '' + lib.optionalString stdenv.hostPlatform.isLinux ''
54 for n in $(find ${finalAttrs.containerExecutor}/bin -type f); do
58 # these libraries are loaded at runtime by the JVM
59 ln -s ${lib.getLib cyrus_sasl}/lib/libsasl2.so $out/lib/native/libsasl2.so.2
60 ln -s ${lib.getLib openssl}/lib/libcrypto.so $out/lib/native/
61 ln -s ${lib.getLib zlib}/lib/libz.so.1 $out/lib/native/
62 ln -s ${lib.getLib zstd}/lib/libzstd.so.1 $out/lib/native/
63 ln -s ${lib.getLib bzip2}/lib/libbz2.so.1 $out/lib/native/
64 ln -s ${lib.getLib snappy}/lib/libsnappy.so.1 $out/lib/native/
66 # libjvm.so is in different paths for java 8 and 11
67 # libnativetask.so in hadooop 3 and libhdfs.so in hadoop 2 depend on it
68 find $out/lib/native/ -name 'libnativetask.so*' -o -name 'libhdfs.so*' | \
69 xargs -n1 patchelf --add-rpath $(dirname $(find ${finalAttrs.jdk.home} -name libjvm.so | head -n1))
71 # NixOS/nixpkgs#193370
72 # This workaround is needed to use protobuf 3.19
73 # hadoop 3.3+ depends on protobuf 3.18, 3.2 depends on 3.8
74 find $out/lib/native -name 'libhdfspp.so*' | \
75 xargs -r -n1 patchelf --replace-needed libprotobuf.so.${
76 if (lib.versionAtLeast finalAttrs.version "3.3") then "18"
80 patchelf --replace-needed libcrypto.so.1.1 libcrypto.so \
81 $out/lib/native/{libhdfs{pp,}.so*,examples/{pipes-sort,wordcount-nopipe,wordcount-part,wordcount-simple}}
84 for n in $(find $out/bin -type f ! -name "*.*"); do
86 --set-default JAVA_HOME ${finalAttrs.jdk.home}\
87 --set-default HADOOP_HOME $out/\
88 --run "test -d /etc/hadoop-conf && export HADOOP_CONF_DIR=\''${HADOOP_CONF_DIR-'/etc/hadoop-conf/'}"\
89 --set-default HADOOP_CONF_DIR $out/etc/hadoop/\
90 --prefix PATH : "${lib.makeBinPath [ bash coreutils which]}"\
91 --prefix JAVA_LIBRARY_PATH : "${lib.makeLibraryPath finalAttrs.buildInputs}"
93 '' + (lib.optionalString sparkSupport ''
94 # Add the spark shuffle service jar to YARN
95 cp ${spark.src}/yarn/spark-${spark.version}-yarn-shuffle.jar $out/share/hadoop/yarn/
98 passthru = { inherit tests; };
100 meta = with lib; recursiveUpdate {
101 homepage = "https://hadoop.apache.org/";
102 description = "Framework for distributed processing of large data sets across clusters of computers";
103 license = licenses.asl20;
104 sourceProvenance = with sourceTypes; [ binaryBytecode ];
107 The Apache Hadoop software library is a framework that allows for
108 the distributed processing of large data sets across clusters of
109 computers using a simple programming model. It is designed to
110 scale up from single servers to thousands of machines, each
111 offering local computation and storage. Rather than rely on
112 hardware to deliver high-avaiability, the library itself is
113 designed to detect and handle failures at the application layer,
114 so delivering a highly-availabile service on top of a cluster of
115 computers, each of which may be prone to failures.
117 maintainers = with maintainers; [ illustris ];
118 platforms = attrNames platformAttrs;
119 } (attrByPath [ stdenv.system "meta" ] {} platformAttrs);
123 # Different version of hadoop support different java runtime versions
124 # https://cwiki.apache.org/confluence/display/HADOOP/Hadoop+Java+Versions
125 hadoop_3_4 = common rec {
127 platformAttrs = rec {
130 hash = "sha256-4xGnhIBBQDD57GNUml1oXmnibyBxA9mr8hpIud0DyGw=";
131 srcHash = "sha256-viDF3LdRCZHqFycOYfN7nUQBPHiMCIjmu7jgIAaaK9E=";
133 x86_64-darwin = x86_64-linux;
134 aarch64-linux = x86_64-linux // {
135 hash = "sha256-QWxzKtNyw/AzcHMv0v7kj91pw1HO7VAN9MHO84caFk8=";
137 aarch64-darwin = aarch64-linux;
139 jdk = jdk11_headless;
140 # TODO: Package and add Intel Storage Acceleration Library
141 tests = nixosTests.hadoop;
143 hadoop_3_3 = common rec {
145 platformAttrs = rec {
148 hash = "sha256-9RlQWcDUECrap//xf3sqhd+Qa8tuGZSHFjGfmXhkGgQ=";
149 srcHash = "sha256-4OEsVhBNV9CJ+PN4FgCduUCVA9/el5yezSCZ6ko3+bU=";
151 x86_64-darwin = x86_64-linux;
152 aarch64-linux = x86_64-linux // {
153 hash = "sha256-5Lv2uA72BJEva5v2yncyPe5gKNCNOPNsoHffVt6KXQ0=";
155 aarch64-darwin = aarch64-linux;
157 jdk = jdk11_headless;
158 # TODO: Package and add Intel Storage Acceleration Library
159 tests = nixosTests.hadoop_3_3;
161 hadoop2 = common rec {
163 platformAttrs.x86_64-linux = {
165 hash = "sha256-xhA4zxqIRGNhIeBnJO9dLKf/gx/Bq+uIyyZwsIafEyo=";
166 srcHash = "sha256-ucxCyXiJo8aL6aNMhZgKEbn8sGKOoMPVREbMGSfSdAI=";
169 tests = nixosTests.hadoop2;