26 assert lib.elem stdenv.system [
41 stdenv.mkDerivation (finalAttrs: {
43 version = platformAttrs.${stdenv.system}.version or (throw "Unsupported system: ${stdenv.system}");
46 "mirror://apache/hadoop/common/hadoop-${finalAttrs.version}/hadoop-${finalAttrs.version}"
47 + lib.optionalString stdenv.hostPlatform.isAarch64 "-aarch64"
49 inherit (platformAttrs.${stdenv.system} or (throw "Unsupported system: ${stdenv.system}"))
55 # Build the container executor binary from source
56 # InstallPhase is not lazily evaluating containerExecutor for some reason
58 if stdenv.hostPlatform.isLinux then
59 (callPackage ./containerExecutor.nix {
60 inherit (finalAttrs) version;
61 inherit platformAttrs;
68 ] ++ lib.optionals stdenv.hostPlatform.isLinux [ autoPatchelfHook ];
69 buildInputs = lib.optionals stdenv.hostPlatform.isLinux [
70 (lib.getLib stdenv.cc.cc)
83 + lib.optionalString stdenv.hostPlatform.isLinux ''
84 for n in $(find ${finalAttrs.containerExecutor}/bin -type f); do
88 # these libraries are loaded at runtime by the JVM
89 ln -s ${lib.getLib cyrus_sasl}/lib/libsasl2.so $out/lib/native/libsasl2.so.2
90 ln -s ${lib.getLib openssl}/lib/libcrypto.so $out/lib/native/
91 ln -s ${lib.getLib zlib}/lib/libz.so.1 $out/lib/native/
92 ln -s ${lib.getLib zstd}/lib/libzstd.so.1 $out/lib/native/
93 ln -s ${lib.getLib bzip2}/lib/libbz2.so.1 $out/lib/native/
94 ln -s ${lib.getLib snappy}/lib/libsnappy.so.1 $out/lib/native/
96 # libjvm.so is in different paths for java 8 and 11
97 # libnativetask.so in hadooop 3 and libhdfs.so in hadoop 2 depend on it
98 find $out/lib/native/ -name 'libnativetask.so*' -o -name 'libhdfs.so*' | \
99 xargs -n1 patchelf --add-rpath $(dirname $(find ${finalAttrs.jdk.home} -name libjvm.so | head -n1))
101 # NixOS/nixpkgs#193370
102 # This workaround is needed to use protobuf 3.19
103 # hadoop 3.3+ depends on protobuf 3.18, 3.2 depends on 3.8
104 find $out/lib/native -name 'libhdfspp.so*' | \
105 xargs -r -n1 patchelf --replace-needed libprotobuf.so.${
106 if (lib.versionAtLeast finalAttrs.version "3.4.1") then
108 else if (lib.versionAtLeast finalAttrs.version "3.3") then
114 patchelf --replace-needed libcrypto.so.1.1 libcrypto.so \
115 $out/lib/native/{libhdfs{pp,}.so*,examples/{pipes-sort,wordcount-nopipe,wordcount-part,wordcount-simple}}
119 for n in $(find $out/bin -type f ! -name "*.*"); do
121 --set-default JAVA_HOME ${finalAttrs.jdk.home}\
122 --set-default HADOOP_HOME $out/\
123 --run "test -d /etc/hadoop-conf && export HADOOP_CONF_DIR=\''${HADOOP_CONF_DIR-'/etc/hadoop-conf/'}"\
124 --set-default HADOOP_CONF_DIR $out/etc/hadoop/\
132 --prefix JAVA_LIBRARY_PATH : "${lib.makeLibraryPath finalAttrs.buildInputs}"
135 + (lib.optionalString sparkSupport ''
136 # Add the spark shuffle service jar to YARN
137 cp ${spark.src}/yarn/spark-${spark.version}-yarn-shuffle.jar $out/share/hadoop/yarn/
140 passthru = { inherit tests; };
145 homepage = "https://hadoop.apache.org/";
146 description = "Framework for distributed processing of large data sets across clusters of computers";
147 license = licenses.asl20;
148 sourceProvenance = with sourceTypes; [ binaryBytecode ];
151 The Apache Hadoop software library is a framework that allows for
152 the distributed processing of large data sets across clusters of
153 computers using a simple programming model. It is designed to
154 scale up from single servers to thousands of machines, each
155 offering local computation and storage. Rather than rely on
156 hardware to deliver high-avaiability, the library itself is
157 designed to detect and handle failures at the application layer,
158 so delivering a highly-availabile service on top of a cluster of
159 computers, each of which may be prone to failures.
161 maintainers = with maintainers; [ illustris ];
162 platforms = attrNames platformAttrs;
163 } (attrByPath [ stdenv.system "meta" ] { } platformAttrs);
167 # Different version of hadoop support different java runtime versions
168 # https://cwiki.apache.org/confluence/display/HADOOP/Hadoop+Java+Versions
169 hadoop_3_4 = common rec {
171 platformAttrs = rec {
174 hash = "sha256-mtVIeDOZbf5VFOdW9DkQKckFKf0i6NAC/T3QwUwEukY=";
175 srcHash = "sha256-lE9uSohy6GWXprFEYbEin2ITqTms2h6EWXe4nEd3U4Y=";
177 x86_64-darwin = x86_64-linux;
178 aarch64-linux = x86_64-linux // {
180 hash = "sha256-QWxzKtNyw/AzcHMv0v7kj91pw1HO7VAN9MHO84caFk8=";
181 srcHash = "sha256-viDF3LdRCZHqFycOYfN7nUQBPHiMCIjmu7jgIAaaK9E=";
183 aarch64-darwin = aarch64-linux;
185 jdk = jdk11_headless;
186 # TODO: Package and add Intel Storage Acceleration Library
187 tests = nixosTests.hadoop;
189 hadoop_3_3 = common rec {
191 platformAttrs = rec {
194 hash = "sha256-9RlQWcDUECrap//xf3sqhd+Qa8tuGZSHFjGfmXhkGgQ=";
195 srcHash = "sha256-4OEsVhBNV9CJ+PN4FgCduUCVA9/el5yezSCZ6ko3+bU=";
197 x86_64-darwin = x86_64-linux;
198 aarch64-linux = x86_64-linux // {
199 hash = "sha256-5Lv2uA72BJEva5v2yncyPe5gKNCNOPNsoHffVt6KXQ0=";
201 aarch64-darwin = aarch64-linux;
203 jdk = jdk11_headless;
204 # TODO: Package and add Intel Storage Acceleration Library
205 tests = nixosTests.hadoop_3_3;
207 hadoop2 = common rec {
209 platformAttrs.x86_64-linux = {
211 hash = "sha256-xhA4zxqIRGNhIeBnJO9dLKf/gx/Bq+uIyyZwsIafEyo=";
212 srcHash = "sha256-ucxCyXiJo8aL6aNMhZgKEbn8sGKOoMPVREbMGSfSdAI=";
215 tests = nixosTests.hadoop2;