1 { config, lib, pkgs, ... }:
4 cfg = config.services.hadoop;
6 # Config files for hadoop services
7 hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
9 # Generator for HDFS service options
10 hadoopServiceOption = { serviceName, firewallOption ? true, extraOpts ? null }: {
11 enable = mkEnableOption serviceName;
12 restartIfChanged = mkOption {
15 Automatically restart the service on config change.
16 This can be set to false to defer restarts on clusters running critical applications.
17 Please consider the security implications of inadvertently running an older version,
18 and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
22 extraFlags = mkOption{
23 type = with types; listOf str;
25 description = "Extra command line flags to pass to ${serviceName}";
27 "-Dcom.sun.management.jmxremote"
28 "-Dcom.sun.management.jmxremote.port=8010"
32 type = with types; attrsOf str;
34 description = "Extra environment variables for ${serviceName}";
36 } // (optionalAttrs firewallOption {
37 openFirewall = mkOption {
40 description = "Open firewall ports for ${serviceName}.";
42 }) // (optionalAttrs (extraOpts != null) extraOpts);
44 # Generator for HDFS service configs
47 , serviceOptions ? cfg.hdfs."${toLower name}"
48 , description ? "Hadoop HDFS ${name}"
50 , allowedTCPPorts ? [ ]
56 mkIf serviceOptions.enable ( mkMerge [{
57 systemd.services."hdfs-${toLower name}" = {
58 inherit description preStart;
59 environment = environment // serviceOptions.extraEnv;
60 wantedBy = [ "multi-user.target" ];
61 inherit (serviceOptions) restartIfChanged;
64 SyslogIdentifier = "hdfs-${toLower name}";
65 ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} ${toLower name} ${escapeShellArgs serviceOptions.extraFlags}";
70 services.hadoop.gatewayRole.enable = true;
72 networking.firewall.allowedTCPPorts = mkIf
73 ((builtins.hasAttr "openFirewall" serviceOptions) && serviceOptions.openFirewall)
80 options.services.hadoop.hdfs = {
82 namenode = hadoopServiceOption { serviceName = "HDFS NameNode"; } // {
83 formatOnInit = mkOption {
87 Format HDFS namenode on first start. This is useful for quickly spinning up
88 ephemeral HDFS clusters with a single namenode.
89 For HA clusters, initialization involves multiple steps across multiple nodes.
90 Follow this guide to initialize an HA cluster manually:
91 <https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html>
96 datanode = hadoopServiceOption { serviceName = "HDFS DataNode"; } // {
99 description = "Tier and path definitions for datanode storage.";
100 type = with types; nullOr (listOf (submodule {
103 type = enum [ "SSD" "DISK" "ARCHIVE" "RAM_DISK" ];
105 Storage types ([SSD]/[DISK]/[ARCHIVE]/[RAM_DISK]) for HDFS storage policies.
110 example = [ "/var/lib/hadoop/hdfs/dn" ];
111 description = "Determines where on the local filesystem a data node should store its blocks.";
118 journalnode = hadoopServiceOption { serviceName = "HDFS JournalNode"; };
120 zkfc = hadoopServiceOption {
121 serviceName = "HDFS ZooKeeper failover controller";
122 firewallOption = false;
125 httpfs = hadoopServiceOption { serviceName = "HDFS JournalNode"; } // {
126 tempPath = mkOption {
128 default = "/tmp/hadoop/httpfs";
129 description = "HTTPFS_TEMP path used by HTTPFS";
136 (hadoopServiceConfig {
139 9870 # namenode.http-address
140 8020 # namenode.rpc-address
141 8022 # namenode.servicerpc-address
142 8019 # dfs.ha.zkfc.port
144 preStart = (mkIf cfg.hdfs.namenode.formatOnInit
145 "${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true"
149 (hadoopServiceConfig {
151 # port numbers for datanode changed between hadoop 2 and 3
152 allowedTCPPorts = if versionAtLeast cfg.package.version "3" then [
153 9864 # datanode.http.address
154 9866 # datanode.address
155 9867 # datanode.ipc.address
157 50075 # datanode.http.address
158 50010 # datanode.address
159 50020 # datanode.ipc.address
161 extraConfig.services.hadoop.hdfsSiteInternal."dfs.datanode.data.dir" = mkIf (cfg.hdfs.datanode.dataDirs!= null)
162 (concatMapStringsSep "," (x: "["+x.type+"]file://"+x.path) cfg.hdfs.datanode.dataDirs);
165 (hadoopServiceConfig {
166 name = "JournalNode";
168 8480 # dfs.journalnode.http-address
169 8485 # dfs.journalnode.rpc-address
173 (hadoopServiceConfig {
175 description = "Hadoop HDFS ZooKeeper failover controller";
178 (hadoopServiceConfig {
180 environment.HTTPFS_TEMP = cfg.hdfs.httpfs.tempPath;
181 preStart = "mkdir -p $HTTPFS_TEMP";
184 14000 # httpfs.http.port
188 (mkIf cfg.gatewayRole.enable {
190 description = "Hadoop HDFS user";
192 uid = config.ids.uids.hdfs;
195 (mkIf cfg.hdfs.httpfs.enable {
196 users.users.httpfs = {
197 description = "Hadoop HTTPFS user";