nixos/hadoop: fix yarn, add more service configuration options

This commit is contained in:
illustris 2021-10-21 02:01:12 +05:30 committed by Raphael Megzari
parent ee1fd49ebe
commit 91bb2b7016
4 changed files with 240 additions and 62 deletions

View file

@ -1,4 +1,4 @@
{ hadoop, pkgs }: { cfg, pkgs, lib }:
let let
propertyXml = name: value: '' propertyXml = name: value: ''
<property> <property>
@ -13,19 +13,31 @@ let
${builtins.concatStringsSep "\n" (pkgs.lib.mapAttrsToList propertyXml properties)} ${builtins.concatStringsSep "\n" (pkgs.lib.mapAttrsToList propertyXml properties)}
</configuration> </configuration>
''; '';
cfgLine = name: value: ''
${name}=${builtins.toString value}
'';
cfgFile = fileName: properties: pkgs.writeTextDir fileName ''
# generated by NixOS
${builtins.concatStringsSep "" (pkgs.lib.mapAttrsToList cfgLine properties)}
'';
userFunctions = '' userFunctions = ''
hadoop_verify_logdir() { hadoop_verify_logdir() {
echo Skipping verification of log directory echo Skipping verification of log directory
} }
''; '';
hadoopEnv = ''
export HADOOP_LOG_DIR=/tmp/hadoop/$USER
'';
in in
pkgs.buildEnv { pkgs.runCommand "hadoop-conf" {} ''
name = "hadoop-conf"; mkdir -p $out/
paths = [ cp ${siteXml "core-site.xml" cfg.coreSite}/* $out/
(siteXml "core-site.xml" hadoop.coreSite) cp ${siteXml "hdfs-site.xml" cfg.hdfsSite}/* $out/
(siteXml "hdfs-site.xml" hadoop.hdfsSite) cp ${siteXml "mapred-site.xml" cfg.mapredSite}/* $out/
(siteXml "mapred-site.xml" hadoop.mapredSite) cp ${siteXml "yarn-site.xml" cfg.yarnSite}/* $out/
(siteXml "yarn-site.xml" hadoop.yarnSite) cp ${cfgFile "container-executor.cfg" cfg.containerExecutorCfg}/* $out/
(pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions) cp ${pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions}/* $out/
]; cp ${pkgs.writeTextDir "hadoop-env.sh" hadoopEnv}/* $out/
} cp ${cfg.log4jProperties} $out/log4j.properties
${lib.concatMapStringsSep "\n" (dir: "cp -r ${dir}/* $out/") cfg.extraConfDirs}
''

View file

@ -1,5 +1,7 @@
{ config, lib, pkgs, ...}: { config, lib, pkgs, ...}:
let
cfg = config.services.hadoop;
in
with lib; with lib;
{ {
imports = [ ./yarn.nix ./hdfs.nix ]; imports = [ ./yarn.nix ./hdfs.nix ];
@ -17,7 +19,9 @@ with lib;
}; };
hdfsSite = mkOption { hdfsSite = mkOption {
default = {}; default = {
"dfs.namenode.rpc-bind-host" = "0.0.0.0";
};
type = types.attrsOf types.anything; type = types.attrsOf types.anything;
example = literalExpression '' example = literalExpression ''
{ {
@ -28,27 +32,81 @@ with lib;
}; };
mapredSite = mkOption { mapredSite = mkOption {
default = {}; default = {
"mapreduce.framework.name" = "yarn";
"yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
"mapreduce.map.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
"mapreduce.reduce.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
};
type = types.attrsOf types.anything; type = types.attrsOf types.anything;
example = literalExpression '' example = literalExpression ''
{ options.services.hadoop.mapredSite.default // {
"mapreduce.map.cpu.vcores" = "1"; "mapreduce.map.java.opts" = "-Xmx900m -XX:+UseParallelGC";
} }
''; '';
description = "Hadoop mapred-site.xml definition"; description = "Hadoop mapred-site.xml definition";
}; };
yarnSite = mkOption { yarnSite = mkOption {
default = {}; default = {
"yarn.nodemanager.admin-env" = "PATH=$PATH";
"yarn.nodemanager.aux-services" = "mapreduce_shuffle";
"yarn.nodemanager.aux-services.mapreduce_shuffle.class" = "org.apache.hadoop.mapred.ShuffleHandler";
"yarn.nodemanager.bind-host" = "0.0.0.0";
"yarn.nodemanager.container-executor.class" = "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor";
"yarn.nodemanager.env-whitelist" = "JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,LANG,TZ";
"yarn.nodemanager.linux-container-executor.group" = "hadoop";
"yarn.nodemanager.linux-container-executor.path" = "/run/wrappers/yarn-nodemanager/bin/container-executor";
"yarn.nodemanager.log-dirs" = "/var/log/hadoop/yarn/nodemanager";
"yarn.resourcemanager.bind-host" = "0.0.0.0";
"yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler";
};
type = types.attrsOf types.anything; type = types.attrsOf types.anything;
example = literalExpression '' example = literalExpression ''
{ options.services.hadoop.yarnSite.default // {
"yarn.resourcemanager.ha.id" = "resourcemanager1"; "yarn.resourcemanager.hostname" = "''${config.networking.hostName}";
} }
''; '';
description = "Hadoop yarn-site.xml definition"; description = "Hadoop yarn-site.xml definition";
}; };
log4jProperties = mkOption {
default = "${cfg.package}/lib/${cfg.package.untarDir}/etc/hadoop/log4j.properties";
type = types.path;
example = literalExpression ''
"''${pkgs.hadoop}/lib/''${pkgs.hadoop.untarDir}/etc/hadoop/log4j.properties";
'';
description = "log4j.properties file added to HADOOP_CONF_DIR";
};
containerExecutorCfg = mkOption {
default = {
# must be the same as yarn.nodemanager.linux-container-executor.group in yarnSite
"yarn.nodemanager.linux-container-executor.group"="hadoop";
"min.user.id"=1000;
"feature.terminal.enabled"=1;
};
type = types.attrsOf types.anything;
example = literalExpression ''
options.services.hadoop.containerExecutorCfg.default // {
"feature.terminal.enabled" = 0;
}
'';
description = "Yarn container-executor.cfg definition";
};
extraConfDirs = mkOption {
default = [];
type = types.listOf types.path;
example = literalExpression ''
[
./extraHDFSConfs
./extraYARNConfs
]
'';
description = "Directories containing additional config files to be added to HADOOP_CONF_DIR";
};
package = mkOption { package = mkOption {
type = types.package; type = types.package;
default = pkgs.hadoop; default = pkgs.hadoop;
@ -64,6 +122,12 @@ with lib;
users.groups.hadoop = { users.groups.hadoop = {
gid = config.ids.gids.hadoop; gid = config.ids.gids.hadoop;
}; };
environment = {
systemPackages = [ cfg.package ];
etc."hadoop-conf".source = let
hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
in "${hadoopConf}";
};
}) })
]; ];

View file

@ -1,24 +1,54 @@
{ config, lib, pkgs, ...}: { config, lib, pkgs, ...}:
with lib;
let let
cfg = config.services.hadoop; cfg = config.services.hadoop;
hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; }; hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
restartIfChanged = mkOption {
type = types.bool;
description = ''
Automatically restart the service on config change.
This can be set to false to defer restarts on clusters running critical applications.
Please consider the security implications of inadvertently running an older version,
and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
'';
default = false;
};
in in
with lib;
{ {
options.services.hadoop.hdfs = { options.services.hadoop.hdfs = {
namenode.enabled = mkOption { namenode = {
type = types.bool; enabled = mkOption {
default = false; type = types.bool;
description = '' default = false;
Whether to run the Hadoop YARN NameNode description = ''
''; Whether to run the HDFS NameNode
'';
};
inherit restartIfChanged;
openFirewall = mkOption {
type = types.bool;
default = true;
description = ''
Open firewall ports for namenode
'';
};
}; };
datanode.enabled = mkOption { datanode = {
type = types.bool; enabled = mkOption {
default = false; type = types.bool;
description = '' default = false;
Whether to run the Hadoop YARN DataNode description = ''
''; Whether to run the HDFS DataNode
'';
};
inherit restartIfChanged;
openFirewall = mkOption {
type = types.bool;
default = true;
description = ''
Open firewall ports for datanode
'';
};
}; };
}; };
@ -27,10 +57,7 @@ with lib;
systemd.services.hdfs-namenode = { systemd.services.hdfs-namenode = {
description = "Hadoop HDFS NameNode"; description = "Hadoop HDFS NameNode";
wantedBy = [ "multi-user.target" ]; wantedBy = [ "multi-user.target" ];
inherit (cfg.hdfs.namenode) restartIfChanged;
environment = {
HADOOP_HOME = "${cfg.package}";
};
preStart = '' preStart = ''
${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true ${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true
@ -40,24 +67,34 @@ with lib;
User = "hdfs"; User = "hdfs";
SyslogIdentifier = "hdfs-namenode"; SyslogIdentifier = "hdfs-namenode";
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} namenode"; ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} namenode";
Restart = "always";
}; };
}; };
networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.namenode.openFirewall [
9870 # namenode.http-address
8020 # namenode.rpc-address
]);
}) })
(mkIf cfg.hdfs.datanode.enabled { (mkIf cfg.hdfs.datanode.enabled {
systemd.services.hdfs-datanode = { systemd.services.hdfs-datanode = {
description = "Hadoop HDFS DataNode"; description = "Hadoop HDFS DataNode";
wantedBy = [ "multi-user.target" ]; wantedBy = [ "multi-user.target" ];
inherit (cfg.hdfs.datanode) restartIfChanged;
environment = {
HADOOP_HOME = "${cfg.package}";
};
serviceConfig = { serviceConfig = {
User = "hdfs"; User = "hdfs";
SyslogIdentifier = "hdfs-datanode"; SyslogIdentifier = "hdfs-datanode";
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} datanode"; ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} datanode";
Restart = "always";
}; };
}; };
networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.datanode.openFirewall [
9864 # datanode.http.address
9866 # datanode.address
9867 # datanode.ipc.address
]);
}) })
(mkIf ( (mkIf (
cfg.hdfs.namenode.enabled || cfg.hdfs.datanode.enabled cfg.hdfs.namenode.enabled || cfg.hdfs.datanode.enabled

View file

@ -1,24 +1,62 @@
{ config, lib, pkgs, ...}: { config, lib, pkgs, ...}:
with lib;
let let
cfg = config.services.hadoop; cfg = config.services.hadoop;
hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; }; hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
restartIfChanged = mkOption {
type = types.bool;
description = ''
Automatically restart the service on config change.
This can be set to false to defer restarts on clusters running critical applications.
Please consider the security implications of inadvertently running an older version,
and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
'';
default = false;
};
in in
with lib;
{ {
options.services.hadoop.yarn = { options.services.hadoop.yarn = {
resourcemanager.enabled = mkOption { resourcemanager = {
type = types.bool; enabled = mkOption {
default = false; type = types.bool;
description = '' default = false;
Whether to run the Hadoop YARN ResourceManager description = ''
''; Whether to run the Hadoop YARN ResourceManager
'';
};
inherit restartIfChanged;
openFirewall = mkOption {
type = types.bool;
default = true;
description = ''
Open firewall ports for resourcemanager
'';
};
}; };
nodemanager.enabled = mkOption { nodemanager = {
type = types.bool; enabled = mkOption {
default = false; type = types.bool;
description = '' default = false;
Whether to run the Hadoop YARN NodeManager description = ''
''; Whether to run the Hadoop YARN NodeManager
'';
};
inherit restartIfChanged;
addBinBash = mkOption {
type = types.bool;
default = true;
description = ''
Add /bin/bash. This is needed by the linux container executor's launch script.
'';
};
openFirewall = mkOption {
type = types.bool;
default = true;
description = ''
Open firewall ports for nodemanager.
Because containers can listen on any ephemeral port, TCP ports 102465535 will be opened.
'';
};
}; };
}; };
@ -38,36 +76,63 @@ with lib;
systemd.services.yarn-resourcemanager = { systemd.services.yarn-resourcemanager = {
description = "Hadoop YARN ResourceManager"; description = "Hadoop YARN ResourceManager";
wantedBy = [ "multi-user.target" ]; wantedBy = [ "multi-user.target" ];
inherit (cfg.yarn.resourcemanager) restartIfChanged;
environment = {
HADOOP_HOME = "${cfg.package}";
};
serviceConfig = { serviceConfig = {
User = "yarn"; User = "yarn";
SyslogIdentifier = "yarn-resourcemanager"; SyslogIdentifier = "yarn-resourcemanager";
ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " + ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
" resourcemanager"; " resourcemanager";
Restart = "always";
}; };
}; };
networking.firewall.allowedTCPPorts = (mkIf cfg.yarn.resourcemanager.openFirewall [
8088 # resourcemanager.webapp.address
8030 # resourcemanager.scheduler.address
8031 # resourcemanager.resource-tracker.address
8032 # resourcemanager.address
]);
}) })
(mkIf cfg.yarn.nodemanager.enabled { (mkIf cfg.yarn.nodemanager.enabled {
# Needed because yarn hardcodes /bin/bash in container start scripts
# These scripts can't be patched, they are generated at runtime
systemd.tmpfiles.rules = [
(mkIf cfg.yarn.nodemanager.addBinBash "L /bin/bash - - - - /run/current-system/sw/bin/bash")
];
systemd.services.yarn-nodemanager = { systemd.services.yarn-nodemanager = {
description = "Hadoop YARN NodeManager"; description = "Hadoop YARN NodeManager";
wantedBy = [ "multi-user.target" ]; wantedBy = [ "multi-user.target" ];
inherit (cfg.yarn.nodemanager) restartIfChanged;
environment = { preStart = ''
HADOOP_HOME = "${cfg.package}"; # create log dir
}; mkdir -p /var/log/hadoop/yarn/nodemanager
chown yarn:hadoop /var/log/hadoop/yarn/nodemanager
# set up setuid container executor binary
rm -rf /run/wrappers/yarn-nodemanager/ || true
mkdir -p /run/wrappers/yarn-nodemanager/{bin,etc/hadoop}
cp ${cfg.package}/lib/${cfg.package.untarDir}/bin/container-executor /run/wrappers/yarn-nodemanager/bin/
chgrp hadoop /run/wrappers/yarn-nodemanager/bin/container-executor
chmod 6050 /run/wrappers/yarn-nodemanager/bin/container-executor
cp ${hadoopConf}/container-executor.cfg /run/wrappers/yarn-nodemanager/etc/hadoop/
'';
serviceConfig = { serviceConfig = {
User = "yarn"; User = "yarn";
SyslogIdentifier = "yarn-nodemanager"; SyslogIdentifier = "yarn-nodemanager";
PermissionsStartOnly = true;
ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " + ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
" nodemanager"; " nodemanager";
Restart = "always";
}; };
}; };
networking.firewall.allowedTCPPortRanges = [
(mkIf (cfg.yarn.nodemanager.openFirewall) {from = 1024; to = 65535;})
];
}) })
]; ];