mirror of
https://github.com/SebastianWendel/nixpkgs.git
synced 2024-09-20 20:39:04 +02:00
nixos/hadoop: fix yarn, add more service configuration options
This commit is contained in:
parent
ee1fd49ebe
commit
91bb2b7016
|
@ -1,4 +1,4 @@
|
|||
{ hadoop, pkgs }:
|
||||
{ cfg, pkgs, lib }:
|
||||
let
|
||||
propertyXml = name: value: ''
|
||||
<property>
|
||||
|
@ -13,19 +13,31 @@ let
|
|||
${builtins.concatStringsSep "\n" (pkgs.lib.mapAttrsToList propertyXml properties)}
|
||||
</configuration>
|
||||
'';
|
||||
cfgLine = name: value: ''
|
||||
${name}=${builtins.toString value}
|
||||
'';
|
||||
cfgFile = fileName: properties: pkgs.writeTextDir fileName ''
|
||||
# generated by NixOS
|
||||
${builtins.concatStringsSep "" (pkgs.lib.mapAttrsToList cfgLine properties)}
|
||||
'';
|
||||
userFunctions = ''
|
||||
hadoop_verify_logdir() {
|
||||
echo Skipping verification of log directory
|
||||
}
|
||||
'';
|
||||
hadoopEnv = ''
|
||||
export HADOOP_LOG_DIR=/tmp/hadoop/$USER
|
||||
'';
|
||||
in
|
||||
pkgs.buildEnv {
|
||||
name = "hadoop-conf";
|
||||
paths = [
|
||||
(siteXml "core-site.xml" hadoop.coreSite)
|
||||
(siteXml "hdfs-site.xml" hadoop.hdfsSite)
|
||||
(siteXml "mapred-site.xml" hadoop.mapredSite)
|
||||
(siteXml "yarn-site.xml" hadoop.yarnSite)
|
||||
(pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions)
|
||||
];
|
||||
}
|
||||
pkgs.runCommand "hadoop-conf" {} ''
|
||||
mkdir -p $out/
|
||||
cp ${siteXml "core-site.xml" cfg.coreSite}/* $out/
|
||||
cp ${siteXml "hdfs-site.xml" cfg.hdfsSite}/* $out/
|
||||
cp ${siteXml "mapred-site.xml" cfg.mapredSite}/* $out/
|
||||
cp ${siteXml "yarn-site.xml" cfg.yarnSite}/* $out/
|
||||
cp ${cfgFile "container-executor.cfg" cfg.containerExecutorCfg}/* $out/
|
||||
cp ${pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions}/* $out/
|
||||
cp ${pkgs.writeTextDir "hadoop-env.sh" hadoopEnv}/* $out/
|
||||
cp ${cfg.log4jProperties} $out/log4j.properties
|
||||
${lib.concatMapStringsSep "\n" (dir: "cp -r ${dir}/* $out/") cfg.extraConfDirs}
|
||||
''
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
{ config, lib, pkgs, ...}:
|
||||
|
||||
let
|
||||
cfg = config.services.hadoop;
|
||||
in
|
||||
with lib;
|
||||
{
|
||||
imports = [ ./yarn.nix ./hdfs.nix ];
|
||||
|
@ -17,7 +19,9 @@ with lib;
|
|||
};
|
||||
|
||||
hdfsSite = mkOption {
|
||||
default = {};
|
||||
default = {
|
||||
"dfs.namenode.rpc-bind-host" = "0.0.0.0";
|
||||
};
|
||||
type = types.attrsOf types.anything;
|
||||
example = literalExpression ''
|
||||
{
|
||||
|
@ -28,27 +32,81 @@ with lib;
|
|||
};
|
||||
|
||||
mapredSite = mkOption {
|
||||
default = {};
|
||||
default = {
|
||||
"mapreduce.framework.name" = "yarn";
|
||||
"yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
|
||||
"mapreduce.map.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
|
||||
"mapreduce.reduce.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
|
||||
};
|
||||
type = types.attrsOf types.anything;
|
||||
example = literalExpression ''
|
||||
{
|
||||
"mapreduce.map.cpu.vcores" = "1";
|
||||
options.services.hadoop.mapredSite.default // {
|
||||
"mapreduce.map.java.opts" = "-Xmx900m -XX:+UseParallelGC";
|
||||
}
|
||||
'';
|
||||
description = "Hadoop mapred-site.xml definition";
|
||||
};
|
||||
|
||||
yarnSite = mkOption {
|
||||
default = {};
|
||||
default = {
|
||||
"yarn.nodemanager.admin-env" = "PATH=$PATH";
|
||||
"yarn.nodemanager.aux-services" = "mapreduce_shuffle";
|
||||
"yarn.nodemanager.aux-services.mapreduce_shuffle.class" = "org.apache.hadoop.mapred.ShuffleHandler";
|
||||
"yarn.nodemanager.bind-host" = "0.0.0.0";
|
||||
"yarn.nodemanager.container-executor.class" = "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor";
|
||||
"yarn.nodemanager.env-whitelist" = "JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,LANG,TZ";
|
||||
"yarn.nodemanager.linux-container-executor.group" = "hadoop";
|
||||
"yarn.nodemanager.linux-container-executor.path" = "/run/wrappers/yarn-nodemanager/bin/container-executor";
|
||||
"yarn.nodemanager.log-dirs" = "/var/log/hadoop/yarn/nodemanager";
|
||||
"yarn.resourcemanager.bind-host" = "0.0.0.0";
|
||||
"yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler";
|
||||
};
|
||||
type = types.attrsOf types.anything;
|
||||
example = literalExpression ''
|
||||
{
|
||||
"yarn.resourcemanager.ha.id" = "resourcemanager1";
|
||||
options.services.hadoop.yarnSite.default // {
|
||||
"yarn.resourcemanager.hostname" = "''${config.networking.hostName}";
|
||||
}
|
||||
'';
|
||||
description = "Hadoop yarn-site.xml definition";
|
||||
};
|
||||
|
||||
log4jProperties = mkOption {
|
||||
default = "${cfg.package}/lib/${cfg.package.untarDir}/etc/hadoop/log4j.properties";
|
||||
type = types.path;
|
||||
example = literalExpression ''
|
||||
"''${pkgs.hadoop}/lib/''${pkgs.hadoop.untarDir}/etc/hadoop/log4j.properties";
|
||||
'';
|
||||
description = "log4j.properties file added to HADOOP_CONF_DIR";
|
||||
};
|
||||
|
||||
containerExecutorCfg = mkOption {
|
||||
default = {
|
||||
# must be the same as yarn.nodemanager.linux-container-executor.group in yarnSite
|
||||
"yarn.nodemanager.linux-container-executor.group"="hadoop";
|
||||
"min.user.id"=1000;
|
||||
"feature.terminal.enabled"=1;
|
||||
};
|
||||
type = types.attrsOf types.anything;
|
||||
example = literalExpression ''
|
||||
options.services.hadoop.containerExecutorCfg.default // {
|
||||
"feature.terminal.enabled" = 0;
|
||||
}
|
||||
'';
|
||||
description = "Yarn container-executor.cfg definition";
|
||||
};
|
||||
|
||||
extraConfDirs = mkOption {
|
||||
default = [];
|
||||
type = types.listOf types.path;
|
||||
example = literalExpression ''
|
||||
[
|
||||
./extraHDFSConfs
|
||||
./extraYARNConfs
|
||||
]
|
||||
'';
|
||||
description = "Directories containing additional config files to be added to HADOOP_CONF_DIR";
|
||||
};
|
||||
|
||||
package = mkOption {
|
||||
type = types.package;
|
||||
default = pkgs.hadoop;
|
||||
|
@ -64,6 +122,12 @@ with lib;
|
|||
users.groups.hadoop = {
|
||||
gid = config.ids.gids.hadoop;
|
||||
};
|
||||
environment = {
|
||||
systemPackages = [ cfg.package ];
|
||||
etc."hadoop-conf".source = let
|
||||
hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
|
||||
in "${hadoopConf}";
|
||||
};
|
||||
})
|
||||
|
||||
];
|
||||
|
|
|
@ -1,24 +1,54 @@
|
|||
{ config, lib, pkgs, ...}:
|
||||
with lib;
|
||||
let
|
||||
cfg = config.services.hadoop;
|
||||
hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; };
|
||||
hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
|
||||
restartIfChanged = mkOption {
|
||||
type = types.bool;
|
||||
description = ''
|
||||
Automatically restart the service on config change.
|
||||
This can be set to false to defer restarts on clusters running critical applications.
|
||||
Please consider the security implications of inadvertently running an older version,
|
||||
and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
|
||||
'';
|
||||
default = false;
|
||||
};
|
||||
in
|
||||
with lib;
|
||||
{
|
||||
options.services.hadoop.hdfs = {
|
||||
namenode.enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = ''
|
||||
Whether to run the Hadoop YARN NameNode
|
||||
'';
|
||||
namenode = {
|
||||
enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = ''
|
||||
Whether to run the HDFS NameNode
|
||||
'';
|
||||
};
|
||||
inherit restartIfChanged;
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = ''
|
||||
Open firewall ports for namenode
|
||||
'';
|
||||
};
|
||||
};
|
||||
datanode.enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = ''
|
||||
Whether to run the Hadoop YARN DataNode
|
||||
'';
|
||||
datanode = {
|
||||
enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = ''
|
||||
Whether to run the HDFS DataNode
|
||||
'';
|
||||
};
|
||||
inherit restartIfChanged;
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = ''
|
||||
Open firewall ports for datanode
|
||||
'';
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -27,10 +57,7 @@ with lib;
|
|||
systemd.services.hdfs-namenode = {
|
||||
description = "Hadoop HDFS NameNode";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
|
||||
environment = {
|
||||
HADOOP_HOME = "${cfg.package}";
|
||||
};
|
||||
inherit (cfg.hdfs.namenode) restartIfChanged;
|
||||
|
||||
preStart = ''
|
||||
${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true
|
||||
|
@ -40,24 +67,34 @@ with lib;
|
|||
User = "hdfs";
|
||||
SyslogIdentifier = "hdfs-namenode";
|
||||
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} namenode";
|
||||
Restart = "always";
|
||||
};
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.namenode.openFirewall [
|
||||
9870 # namenode.http-address
|
||||
8020 # namenode.rpc-address
|
||||
]);
|
||||
})
|
||||
(mkIf cfg.hdfs.datanode.enabled {
|
||||
systemd.services.hdfs-datanode = {
|
||||
description = "Hadoop HDFS DataNode";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
|
||||
environment = {
|
||||
HADOOP_HOME = "${cfg.package}";
|
||||
};
|
||||
inherit (cfg.hdfs.datanode) restartIfChanged;
|
||||
|
||||
serviceConfig = {
|
||||
User = "hdfs";
|
||||
SyslogIdentifier = "hdfs-datanode";
|
||||
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} datanode";
|
||||
Restart = "always";
|
||||
};
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.datanode.openFirewall [
|
||||
9864 # datanode.http.address
|
||||
9866 # datanode.address
|
||||
9867 # datanode.ipc.address
|
||||
]);
|
||||
})
|
||||
(mkIf (
|
||||
cfg.hdfs.namenode.enabled || cfg.hdfs.datanode.enabled
|
||||
|
|
|
@ -1,24 +1,62 @@
|
|||
{ config, lib, pkgs, ...}:
|
||||
with lib;
|
||||
let
|
||||
cfg = config.services.hadoop;
|
||||
hadoopConf = import ./conf.nix { hadoop = cfg; pkgs = pkgs; };
|
||||
hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
|
||||
restartIfChanged = mkOption {
|
||||
type = types.bool;
|
||||
description = ''
|
||||
Automatically restart the service on config change.
|
||||
This can be set to false to defer restarts on clusters running critical applications.
|
||||
Please consider the security implications of inadvertently running an older version,
|
||||
and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
|
||||
'';
|
||||
default = false;
|
||||
};
|
||||
in
|
||||
with lib;
|
||||
{
|
||||
options.services.hadoop.yarn = {
|
||||
resourcemanager.enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = ''
|
||||
Whether to run the Hadoop YARN ResourceManager
|
||||
'';
|
||||
resourcemanager = {
|
||||
enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = ''
|
||||
Whether to run the Hadoop YARN ResourceManager
|
||||
'';
|
||||
};
|
||||
inherit restartIfChanged;
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = ''
|
||||
Open firewall ports for resourcemanager
|
||||
'';
|
||||
};
|
||||
};
|
||||
nodemanager.enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = ''
|
||||
Whether to run the Hadoop YARN NodeManager
|
||||
'';
|
||||
nodemanager = {
|
||||
enabled = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = ''
|
||||
Whether to run the Hadoop YARN NodeManager
|
||||
'';
|
||||
};
|
||||
inherit restartIfChanged;
|
||||
addBinBash = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = ''
|
||||
Add /bin/bash. This is needed by the linux container executor's launch script.
|
||||
'';
|
||||
};
|
||||
openFirewall = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = ''
|
||||
Open firewall ports for nodemanager.
|
||||
Because containers can listen on any ephemeral port, TCP ports 1024–65535 will be opened.
|
||||
'';
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -38,36 +76,63 @@ with lib;
|
|||
systemd.services.yarn-resourcemanager = {
|
||||
description = "Hadoop YARN ResourceManager";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
|
||||
environment = {
|
||||
HADOOP_HOME = "${cfg.package}";
|
||||
};
|
||||
inherit (cfg.yarn.resourcemanager) restartIfChanged;
|
||||
|
||||
serviceConfig = {
|
||||
User = "yarn";
|
||||
SyslogIdentifier = "yarn-resourcemanager";
|
||||
ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
|
||||
" resourcemanager";
|
||||
Restart = "always";
|
||||
};
|
||||
};
|
||||
networking.firewall.allowedTCPPorts = (mkIf cfg.yarn.resourcemanager.openFirewall [
|
||||
8088 # resourcemanager.webapp.address
|
||||
8030 # resourcemanager.scheduler.address
|
||||
8031 # resourcemanager.resource-tracker.address
|
||||
8032 # resourcemanager.address
|
||||
]);
|
||||
})
|
||||
|
||||
(mkIf cfg.yarn.nodemanager.enabled {
|
||||
# Needed because yarn hardcodes /bin/bash in container start scripts
|
||||
# These scripts can't be patched, they are generated at runtime
|
||||
systemd.tmpfiles.rules = [
|
||||
(mkIf cfg.yarn.nodemanager.addBinBash "L /bin/bash - - - - /run/current-system/sw/bin/bash")
|
||||
];
|
||||
|
||||
systemd.services.yarn-nodemanager = {
|
||||
description = "Hadoop YARN NodeManager";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
inherit (cfg.yarn.nodemanager) restartIfChanged;
|
||||
|
||||
environment = {
|
||||
HADOOP_HOME = "${cfg.package}";
|
||||
};
|
||||
preStart = ''
|
||||
# create log dir
|
||||
mkdir -p /var/log/hadoop/yarn/nodemanager
|
||||
chown yarn:hadoop /var/log/hadoop/yarn/nodemanager
|
||||
|
||||
# set up setuid container executor binary
|
||||
rm -rf /run/wrappers/yarn-nodemanager/ || true
|
||||
mkdir -p /run/wrappers/yarn-nodemanager/{bin,etc/hadoop}
|
||||
cp ${cfg.package}/lib/${cfg.package.untarDir}/bin/container-executor /run/wrappers/yarn-nodemanager/bin/
|
||||
chgrp hadoop /run/wrappers/yarn-nodemanager/bin/container-executor
|
||||
chmod 6050 /run/wrappers/yarn-nodemanager/bin/container-executor
|
||||
cp ${hadoopConf}/container-executor.cfg /run/wrappers/yarn-nodemanager/etc/hadoop/
|
||||
'';
|
||||
|
||||
serviceConfig = {
|
||||
User = "yarn";
|
||||
SyslogIdentifier = "yarn-nodemanager";
|
||||
PermissionsStartOnly = true;
|
||||
ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
|
||||
" nodemanager";
|
||||
Restart = "always";
|
||||
};
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPortRanges = [
|
||||
(mkIf (cfg.yarn.nodemanager.openFirewall) {from = 1024; to = 65535;})
|
||||
];
|
||||
})
|
||||
|
||||
];
|
||||
|
|
Loading…
Reference in a new issue