[英]Cloudformation template to create EMR cluster
我正在嘗試借助 CloudFormation 模板創建包含 Hadoop、livy、Spark、ZooKeeper 和 Hive 等應用程序的 EMR-5.30.1 集群。 但是這個模板的問題是我只能使用上述應用程序列表中的一個應用程序來創建集群。
下面是 CloudFormation 模板
{
"AWSTemplateFormatVersion": "2010-09-09",
"Description": "Best Practice EMR Cluster for Spark or S3 backed Hbase",
"Parameters": {
"EMRClusterName": {
"Description": "Name of the cluster",
"Type": "String",
"Default": "emrcluster"
},
"KeyName": {
"Description": "Must be an existing Keyname",
"Type": "String",
"Default": "keyfilename"
},
"MasterInstanceType": {
"Description": "Instance type to be used for the master instance.",
"Type": "String",
"Default": "m5.xlarge"
},
"CoreInstanceType": {
"Description": "Instance type to be used for core instances.",
"Type": "String",
"Default": "m5.xlarge"
},
"NumberOfCoreInstances": {
"Description": "Must be a valid number",
"Type": "Number",
"Default": 1
},
"SubnetID": {
"Description": "Must be Valid public subnet ID",
"Default": "subnet-ee15b3e0",
"Type": "String"
},
"LogUri": {
"Description": "Must be a valid S3 URL",
"Default": "s3://aws/elasticmapreduce/",
"Type": "String"
},
"S3DataUri": {
"Description": "Must be a valid S3 bucket URL ",
"Default": "s3://aws/elasticmapreduce/",
"Type": "String"
},
"ReleaseLabel": {
"Description": "Must be a valid EMR release version",
"Default": "emr-5.30.1",
"Type": "String"
},
"Applications": {
"Description": "Please select which application will be installed on the cluster this would be either Ganglia and spark, or Ganglia and s3 backed Hbase",
"Type": "String",
"AllowedValues": [
"Spark",
"Hbase",
"Hive",
"Livy",
"ZooKeeper"
]
}
},
"Mappings": {},
"Conditions": {
"Spark": {
"Fn::Equals": [
{
"Ref": "Applications"
},
"Spark"
]
},
"Hbase": {
"Fn::Equals": [
{
"Ref": "Applications"
},
"Hbase"
]
},
"Hive": {
"Fn::Equals": [
{
"Ref": "Applications"
},
"Hive"
]
},
"Livy": {
"Fn::Equals": [
{
"Ref": "Applications"
},
"Livy"
]
},
"ZooKeeper": {
"Fn::Equals": [
{
"Ref": "Applications"
},
"ZooKeeper"
]
}
},
"Resources": {
"EMRCluster": {
"DependsOn": [
"EMRClusterServiceRole",
"EMRClusterinstanceProfileRole",
"EMRClusterinstanceProfile"
],
"Type": "AWS::EMR::Cluster",
"Properties": {
"Applications": [
{
"Name": "Ganglia"
},
{
"Fn::If": [
"Spark",
{
"Name": "Spark"
},
{
"Ref": "AWS::NoValue"
}
]
},
{
"Fn::If": [
"Hbase",
{
"Name": "Hbase"
},
{
"Ref": "AWS::NoValue"
}
]
},
{
"Fn::If": [
"Hive",
{
"Name": "Hive"
},
{
"Ref": "AWS::NoValue"
}
]
},
{
"Fn::If": [
"Livy",
{
"Name": "Livy"
},
{
"Ref": "AWS::NoValue"
}
]
},
{
"Fn::If": [
"ZooKeeper",
{
"Name": "ZooKeeper"
},
{
"Ref": "AWS::NoValue"
}
]
}
],
"Configurations": [
{
"Classification": "hbase-site",
"ConfigurationProperties": {
"hbase.rootdir":{"Ref":"S3DataUri"}
}
},
{
"Classification": "hbase",
"ConfigurationProperties": {
"hbase.emr.storageMode": "s3"
}
}
],
"Instances": {
"Ec2KeyName": {
"Ref": "KeyName"
},
"Ec2SubnetId": {
"Ref": "SubnetID"
},
"MasterInstanceGroup": {
"InstanceCount": 1,
"InstanceType": {
"Ref": "MasterInstanceType"
},
"Market": "ON_DEMAND",
"Name": "Master"
},
"CoreInstanceGroup": {
"InstanceCount": {
"Ref": "NumberOfCoreInstances"
},
"InstanceType": {
"Ref": "CoreInstanceType"
},
"Market": "ON_DEMAND",
"Name": "Core"
},
"TerminationProtected": false
},
"VisibleToAllUsers": true,
"JobFlowRole": {
"Ref": "EMRClusterinstanceProfile"
},
"ReleaseLabel": {
"Ref": "ReleaseLabel"
},
"LogUri": {
"Ref": "LogUri"
},
"Name": {
"Ref": "EMRClusterName"
},
"AutoScalingRole": "EMR_AutoScaling_DefaultRole",
"ServiceRole": {
"Ref": "EMRClusterServiceRole"
}
}
},
"EMRClusterServiceRole": {
"Type": "AWS::IAM::Role",
"Properties": {
"AssumeRolePolicyDocument": {
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": [
"elasticmapreduce.amazonaws.com"
]
},
"Action": [
"sts:AssumeRole"
]
}
]
},
"ManagedPolicyArns": [
"arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceRole"
],
"Path": "/"
}
},
"EMRClusterinstanceProfileRole": {
"Type": "AWS::IAM::Role",
"Properties": {
"AssumeRolePolicyDocument": {
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": [
"ec2.amazonaws.com"
]
},
"Action": [
"sts:AssumeRole"
]
}
]
},
"ManagedPolicyArns": [
"arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceforEC2Role"
],
"Path": "/"
}
},
"EMRClusterinstanceProfile": {
"Type": "AWS::IAM::InstanceProfile",
"Properties": {
"Path": "/",
"Roles": [
{
"Ref": "EMRClusterinstanceProfileRole"
}
]
}
}
},
"Outputs": {}
}
另外,我也想在這個模板中添加一個引導腳本,任何人都可以幫我解決這個問題。
您可以使用以下方式:-
如果您設置“ReleaseLabel”,則無需提及應用程序的版本
“應用程序”:[{“名稱”:“Hive”},{“名稱”:“Presto”},{“名稱”:“Spark”}]
對於引導程序:-
"BootstrapActions": [{ "Name": "setup", "ScriptBootstrapAction": { "Path": "s3://bucket/key/Bootstrap.sh" } }]
像這樣定義一次創建所有應用程序。
{
"Type": "AWS::EMR::Cluster",
"Properties": {
"Applications": [
{
"Name": "Ganglia"
},
{
"Name": "Spark"
},
{
"Name": "Livy"
},
{
"Name": "ZooKeeper"
},
{
"Name": "JupyterHub"
}
]
}
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.